{ "best_global_step": 2964, "best_metric": 0.2924116253852844, "best_model_checkpoint": "embeddinggemma-300m-pii-detector/checkpoint-2964", "epoch": 3.0, "eval_steps": 500, "global_step": 4446, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.33738191632928477, "grad_norm": 17.098201751708984, "learning_rate": 4.438821412505623e-05, "loss": 0.5645, "num_input_tokens_seen": 512000, "step": 500, "train_runtime": 47.9743, "train_tokens_per_second": 10672.374 }, { "epoch": 0.6747638326585695, "grad_norm": 1.4570860862731934, "learning_rate": 3.876518218623482e-05, "loss": 0.3907, "num_input_tokens_seen": 1024000, "step": 1000, "train_runtime": 94.2729, "train_tokens_per_second": 10862.077 }, { "epoch": 1.0, "eval_accuracy": 0.8627318718381113, "eval_loss": 0.30147919058799744, "eval_runtime": 7.391, "eval_samples_per_second": 401.161, "eval_steps_per_second": 50.196, "num_input_tokens_seen": 1517568, "step": 1482 }, { "epoch": 1.0121457489878543, "grad_norm": 14.889694213867188, "learning_rate": 3.3142150247413403e-05, "loss": 0.3335, "num_input_tokens_seen": 1536000, "step": 1500, "train_runtime": 151.9538, "train_tokens_per_second": 10108.336 }, { "epoch": 1.349527665317139, "grad_norm": 0.4145456552505493, "learning_rate": 2.7519118308591997e-05, "loss": 0.3069, "num_input_tokens_seen": 2048000, "step": 2000, "train_runtime": 198.9267, "train_tokens_per_second": 10295.252 }, { "epoch": 1.686909581646424, "grad_norm": 49.79447555541992, "learning_rate": 2.1896086369770583e-05, "loss": 0.2847, "num_input_tokens_seen": 2560000, "step": 2500, "train_runtime": 245.6972, "train_tokens_per_second": 10419.327 }, { "epoch": 2.0, "eval_accuracy": 0.8978077571669477, "eval_loss": 0.2924116253852844, "eval_runtime": 7.3471, "eval_samples_per_second": 403.563, "eval_steps_per_second": 50.496, "num_input_tokens_seen": 3035136, "step": 2964 }, { "epoch": 2.0242914979757085, "grad_norm": 0.2802899181842804, "learning_rate": 1.627305443094917e-05, "loss": 0.2542, "num_input_tokens_seen": 3072000, "step": 3000, "train_runtime": 303.072, "train_tokens_per_second": 10136.207 }, { "epoch": 2.361673414304993, "grad_norm": 29.810983657836914, "learning_rate": 1.0650022492127757e-05, "loss": 0.2176, "num_input_tokens_seen": 3584000, "step": 3500, "train_runtime": 350.1674, "train_tokens_per_second": 10235.104 }, { "epoch": 2.699055330634278, "grad_norm": 14.638989448547363, "learning_rate": 5.026990553306343e-06, "loss": 0.2064, "num_input_tokens_seen": 4096000, "step": 4000, "train_runtime": 397.7303, "train_tokens_per_second": 10298.436 }, { "epoch": 3.0, "eval_accuracy": 0.8930860033726813, "eval_loss": 0.30713844299316406, "eval_runtime": 7.2761, "eval_samples_per_second": 407.498, "eval_steps_per_second": 50.989, "num_input_tokens_seen": 4552704, "step": 4446 }, { "epoch": 3.0, "num_input_tokens_seen": 4552704, "step": 4446, "total_flos": 2773636063690752.0, "train_loss": 0.30860748496937207, "train_runtime": 450.538, "train_samples_per_second": 78.946, "train_steps_per_second": 9.868 } ], "logging_steps": 500, "max_steps": 4446, "num_input_tokens_seen": 4552704, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2773636063690752.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }