| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5, | |
| "eval_steps": 3, | |
| "global_step": 60, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008333333333333333, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.1097, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.1424, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.4957225914582386, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 1.1714, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 1.0203233450225075, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 1.1459, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.1952212091375145, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1465, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 1.832578954318156, | |
| "learning_rate": 2.839090909090909e-06, | |
| "loss": 1.1517, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.0877339332216878, | |
| "learning_rate": 2.678181818181818e-06, | |
| "loss": 1.0991, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 1.4203314600042332, | |
| "learning_rate": 2.5172727272727275e-06, | |
| "loss": 1.1481, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 2.543863444651869, | |
| "learning_rate": 2.3563636363636366e-06, | |
| "loss": 1.151, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.225, | |
| "grad_norm": 2.2672694357791805, | |
| "learning_rate": 2.1954545454545456e-06, | |
| "loss": 1.1132, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.7570282713584158, | |
| "learning_rate": 2.0345454545454546e-06, | |
| "loss": 1.1043, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.275, | |
| "grad_norm": 0.7556695763204282, | |
| "learning_rate": 1.9272727272727273e-06, | |
| "loss": 1.0492, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.5250919282965105, | |
| "learning_rate": 1.8199999999999997e-06, | |
| "loss": 1.1206, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.325, | |
| "grad_norm": 2.402860972914852, | |
| "learning_rate": 1.659090909090909e-06, | |
| "loss": 1.0908, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 0.9250227812759156, | |
| "learning_rate": 1.4981818181818184e-06, | |
| "loss": 1.0982, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 1.0119045287556745, | |
| "learning_rate": 1.3372727272727274e-06, | |
| "loss": 1.1147, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.7972164809866773, | |
| "learning_rate": 1.1763636363636364e-06, | |
| "loss": 1.1004, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.425, | |
| "grad_norm": 1.4489209555210352, | |
| "learning_rate": 1.0154545454545454e-06, | |
| "loss": 1.0982, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.7531358827177462, | |
| "learning_rate": 8.545454545454544e-07, | |
| "loss": 1.0794, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.475, | |
| "grad_norm": 0.7368058065522421, | |
| "learning_rate": 6.936363636363635e-07, | |
| "loss": 1.0948, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.2350079863263905, | |
| "learning_rate": 5.327272727272729e-07, | |
| "loss": 1.1091, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "step": 60, | |
| "total_flos": 238563958456320.0, | |
| "train_loss": 1.1158998648325602, | |
| "train_runtime": 19061.2793, | |
| "train_samples_per_second": 0.402, | |
| "train_steps_per_second": 0.003 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 60, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 12, | |
| "total_flos": 238563958456320.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |