| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9912023460410557, | |
| "eval_steps": 10, | |
| "global_step": 340, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05865102639296188, | |
| "eval_loss": 0.38869303464889526, | |
| "eval_runtime": 33.6966, | |
| "eval_samples_per_second": 22.465, | |
| "eval_steps_per_second": 5.639, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11730205278592376, | |
| "grad_norm": 1.1477874212327048, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 0.4096, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11730205278592376, | |
| "eval_loss": 0.33711880445480347, | |
| "eval_runtime": 32.8785, | |
| "eval_samples_per_second": 23.024, | |
| "eval_steps_per_second": 5.779, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.17595307917888564, | |
| "eval_loss": 0.2933129668235779, | |
| "eval_runtime": 32.8713, | |
| "eval_samples_per_second": 23.029, | |
| "eval_steps_per_second": 5.78, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.23460410557184752, | |
| "grad_norm": 1.121589060878037, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 0.3048, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.23460410557184752, | |
| "eval_loss": 0.25960347056388855, | |
| "eval_runtime": 32.9112, | |
| "eval_samples_per_second": 23.001, | |
| "eval_steps_per_second": 5.773, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2932551319648094, | |
| "eval_loss": 0.24025067687034607, | |
| "eval_runtime": 32.9388, | |
| "eval_samples_per_second": 22.982, | |
| "eval_steps_per_second": 5.768, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3519061583577713, | |
| "grad_norm": 1.0677324544458529, | |
| "learning_rate": 9.990516643685222e-06, | |
| "loss": 0.2471, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3519061583577713, | |
| "eval_loss": 0.228533536195755, | |
| "eval_runtime": 32.9199, | |
| "eval_samples_per_second": 22.995, | |
| "eval_steps_per_second": 5.772, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.41055718475073316, | |
| "eval_loss": 0.21884050965309143, | |
| "eval_runtime": 32.9261, | |
| "eval_samples_per_second": 22.991, | |
| "eval_steps_per_second": 5.77, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.46920821114369504, | |
| "grad_norm": 0.8221117546113363, | |
| "learning_rate": 9.901828808578846e-06, | |
| "loss": 0.2281, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.46920821114369504, | |
| "eval_loss": 0.21087835729122162, | |
| "eval_runtime": 32.9118, | |
| "eval_samples_per_second": 23.001, | |
| "eval_steps_per_second": 5.773, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5278592375366569, | |
| "eval_loss": 0.2049574851989746, | |
| "eval_runtime": 33.0853, | |
| "eval_samples_per_second": 22.88, | |
| "eval_steps_per_second": 5.743, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5865102639296188, | |
| "grad_norm": 0.7863445026416022, | |
| "learning_rate": 9.721431493385322e-06, | |
| "loss": 0.2073, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5865102639296188, | |
| "eval_loss": 0.20104646682739258, | |
| "eval_runtime": 33.0368, | |
| "eval_samples_per_second": 22.914, | |
| "eval_steps_per_second": 5.751, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "eval_loss": 0.19683966040611267, | |
| "eval_runtime": 32.895, | |
| "eval_samples_per_second": 23.013, | |
| "eval_steps_per_second": 5.776, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7038123167155426, | |
| "grad_norm": 0.7460954708103601, | |
| "learning_rate": 9.452699794345583e-06, | |
| "loss": 0.1911, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7038123167155426, | |
| "eval_loss": 0.1945473700761795, | |
| "eval_runtime": 32.8944, | |
| "eval_samples_per_second": 23.013, | |
| "eval_steps_per_second": 5.776, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7624633431085044, | |
| "eval_loss": 0.1912163645029068, | |
| "eval_runtime": 32.9495, | |
| "eval_samples_per_second": 22.975, | |
| "eval_steps_per_second": 5.766, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8211143695014663, | |
| "grad_norm": 0.7504426793167469, | |
| "learning_rate": 9.100661476680379e-06, | |
| "loss": 0.1876, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8211143695014663, | |
| "eval_loss": 0.18776217103004456, | |
| "eval_runtime": 32.9203, | |
| "eval_samples_per_second": 22.995, | |
| "eval_steps_per_second": 5.772, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8797653958944281, | |
| "eval_loss": 0.18520714342594147, | |
| "eval_runtime": 32.8973, | |
| "eval_samples_per_second": 23.011, | |
| "eval_steps_per_second": 5.776, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9384164222873901, | |
| "grad_norm": 0.7748459339656144, | |
| "learning_rate": 8.671902908935942e-06, | |
| "loss": 0.1887, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9384164222873901, | |
| "eval_loss": 0.18254177272319794, | |
| "eval_runtime": 32.8779, | |
| "eval_samples_per_second": 23.025, | |
| "eval_steps_per_second": 5.779, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9970674486803519, | |
| "eval_loss": 0.18051140010356903, | |
| "eval_runtime": 32.9923, | |
| "eval_samples_per_second": 22.945, | |
| "eval_steps_per_second": 5.759, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0527859237536656, | |
| "grad_norm": 0.9218927864042982, | |
| "learning_rate": 8.174445837049614e-06, | |
| "loss": 0.1553, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0527859237536656, | |
| "eval_loss": 0.18640676140785217, | |
| "eval_runtime": 32.9183, | |
| "eval_samples_per_second": 22.996, | |
| "eval_steps_per_second": 5.772, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1114369501466275, | |
| "eval_loss": 0.18077336251735687, | |
| "eval_runtime": 32.8676, | |
| "eval_samples_per_second": 23.032, | |
| "eval_steps_per_second": 5.781, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.1700879765395895, | |
| "grad_norm": 0.9109052804191333, | |
| "learning_rate": 7.617597303598754e-06, | |
| "loss": 0.1332, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.1700879765395895, | |
| "eval_loss": 0.1823471635580063, | |
| "eval_runtime": 32.9625, | |
| "eval_samples_per_second": 22.965, | |
| "eval_steps_per_second": 5.764, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2287390029325513, | |
| "eval_loss": 0.17943565547466278, | |
| "eval_runtime": 32.9683, | |
| "eval_samples_per_second": 22.961, | |
| "eval_steps_per_second": 5.763, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.2873900293255132, | |
| "grad_norm": 0.8287150136550924, | |
| "learning_rate": 7.011775520129363e-06, | |
| "loss": 0.1349, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2873900293255132, | |
| "eval_loss": 0.17819999158382416, | |
| "eval_runtime": 32.9652, | |
| "eval_samples_per_second": 22.964, | |
| "eval_steps_per_second": 5.764, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.3460410557184752, | |
| "eval_loss": 0.1771620512008667, | |
| "eval_runtime": 32.8691, | |
| "eval_samples_per_second": 23.031, | |
| "eval_steps_per_second": 5.781, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.404692082111437, | |
| "grad_norm": 0.7173372889400553, | |
| "learning_rate": 6.368314950360416e-06, | |
| "loss": 0.1333, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.404692082111437, | |
| "eval_loss": 0.17452633380889893, | |
| "eval_runtime": 32.8914, | |
| "eval_samples_per_second": 23.015, | |
| "eval_steps_per_second": 5.777, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.4633431085043989, | |
| "eval_loss": 0.17504557967185974, | |
| "eval_runtime": 32.8301, | |
| "eval_samples_per_second": 23.058, | |
| "eval_steps_per_second": 5.787, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.5219941348973607, | |
| "grad_norm": 0.874094982171954, | |
| "learning_rate": 5.699254251008524e-06, | |
| "loss": 0.1338, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.5219941348973607, | |
| "eval_loss": 0.17338429391384125, | |
| "eval_runtime": 32.8983, | |
| "eval_samples_per_second": 23.01, | |
| "eval_steps_per_second": 5.775, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.5806451612903225, | |
| "eval_loss": 0.171478271484375, | |
| "eval_runtime": 32.8244, | |
| "eval_samples_per_second": 23.062, | |
| "eval_steps_per_second": 5.788, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.6392961876832843, | |
| "grad_norm": 0.7979095239427625, | |
| "learning_rate": 5.017111037698477e-06, | |
| "loss": 0.1267, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.6392961876832843, | |
| "eval_loss": 0.17220577597618103, | |
| "eval_runtime": 32.7725, | |
| "eval_samples_per_second": 23.099, | |
| "eval_steps_per_second": 5.798, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.6979472140762464, | |
| "eval_loss": 0.16855686902999878, | |
| "eval_runtime": 32.8059, | |
| "eval_samples_per_second": 23.075, | |
| "eval_steps_per_second": 5.792, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.7565982404692082, | |
| "grad_norm": 0.705233520985163, | |
| "learning_rate": 4.334647689917734e-06, | |
| "loss": 0.1317, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.7565982404692082, | |
| "eval_loss": 0.1680660992860794, | |
| "eval_runtime": 32.9544, | |
| "eval_samples_per_second": 22.971, | |
| "eval_steps_per_second": 5.766, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.8152492668621703, | |
| "eval_loss": 0.16704507172107697, | |
| "eval_runtime": 32.8838, | |
| "eval_samples_per_second": 23.02, | |
| "eval_steps_per_second": 5.778, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.873900293255132, | |
| "grad_norm": 0.7535701160217406, | |
| "learning_rate": 3.6646325766256423e-06, | |
| "loss": 0.1251, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.873900293255132, | |
| "eval_loss": 0.16622412204742432, | |
| "eval_runtime": 32.8893, | |
| "eval_samples_per_second": 23.017, | |
| "eval_steps_per_second": 5.777, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.932551319648094, | |
| "eval_loss": 0.16477644443511963, | |
| "eval_runtime": 33.006, | |
| "eval_samples_per_second": 22.935, | |
| "eval_steps_per_second": 5.757, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.9912023460410557, | |
| "grad_norm": 0.7373647805377234, | |
| "learning_rate": 3.019601169804216e-06, | |
| "loss": 0.1186, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.9912023460410557, | |
| "eval_loss": 0.1639869511127472, | |
| "eval_runtime": 32.9932, | |
| "eval_samples_per_second": 22.944, | |
| "eval_steps_per_second": 5.759, | |
| "step": 340 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 510, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 31189214167040.0, | |
| "train_batch_size": 5, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |