yg422 commited on
Commit
cc339c1
·
verified ·
1 Parent(s): 547d95f

Add a2/completion/checkpoint-2712

Browse files
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  a2/completion/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  a2/completion/checkpoint-1356/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  a2/completion/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  a2/completion/checkpoint-1356/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ a2/completion/checkpoint-2712/tokenizer.json filter=lfs diff=lfs merge=lfs -text
a2/completion/checkpoint-2712/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
a2/completion/checkpoint-2712/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": null,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 151645,
10
+ "initializer_range": 0.02,
11
+ "layer_norm_epsilon": 1e-05,
12
+ "model_type": "gpt2",
13
+ "n_embd": 1024,
14
+ "n_head": 16,
15
+ "n_inner": null,
16
+ "n_layer": 24,
17
+ "n_positions": 2048,
18
+ "pad_token_id": 151643,
19
+ "reorder_and_upcast_attn": false,
20
+ "resid_pdrop": 0.1,
21
+ "scale_attn_by_inverse_layer_idx": false,
22
+ "scale_attn_weights": true,
23
+ "summary_activation": null,
24
+ "summary_first_dropout": 0.1,
25
+ "summary_proj_to_labels": true,
26
+ "summary_type": "cls_index",
27
+ "summary_use_proj": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.53.0",
30
+ "use_cache": true,
31
+ "vocab_size": 151669
32
+ }
a2/completion/checkpoint-2712/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 151645,
4
+ "pad_token_id": 151643,
5
+ "transformers_version": "4.53.0"
6
+ }
a2/completion/checkpoint-2712/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
a2/completion/checkpoint-2712/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25ca46d7c51e4f3bb9f96da8b6928a307b6d442cac223281615ee2f46cacda12
3
+ size 1838900736
a2/completion/checkpoint-2712/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0dafc82bcf4b1c98b828570ab6f490dfe43149e03c929d7fc6122d37d104159
3
+ size 1383
a2/completion/checkpoint-2712/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18e17cf4cfe03b4b817120b532ba7e396d999864ee08f75f6c3dd964b1a2412f
3
+ size 1465
a2/completion/checkpoint-2712/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
a2/completion/checkpoint-2712/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574de68a0f63f2004784a421c7d42c2b2786c05cb38542d2ed3525757a1f7fde
3
+ size 11422932
a2/completion/checkpoint-2712/tokenizer_config.json ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "max_length": 512,
235
+ "model_max_length": 131072,
236
+ "pad_to_multiple_of": null,
237
+ "pad_token": "<|endoftext|>",
238
+ "pad_token_type_id": 0,
239
+ "padding_side": "right",
240
+ "split_special_tokens": false,
241
+ "stride": 0,
242
+ "tokenizer_class": "Qwen2Tokenizer",
243
+ "truncation_side": "right",
244
+ "truncation_strategy": "longest_first",
245
+ "unk_token": null
246
+ }
a2/completion/checkpoint-2712/trainer_state.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2712,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.07374631268436578,
14
+ "grad_norm": 2.3976473808288574,
15
+ "learning_rate": 4.926991150442478e-05,
16
+ "loss": 7.2626,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.14749262536873156,
21
+ "grad_norm": 3.085752487182617,
22
+ "learning_rate": 4.8532448377581126e-05,
23
+ "loss": 5.118,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.22123893805309736,
28
+ "grad_norm": 3.1417922973632812,
29
+ "learning_rate": 4.7794985250737464e-05,
30
+ "loss": 4.6592,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.2949852507374631,
35
+ "grad_norm": 3.0027577877044678,
36
+ "learning_rate": 4.705752212389381e-05,
37
+ "loss": 4.3722,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.3687315634218289,
42
+ "grad_norm": 2.5502541065216064,
43
+ "learning_rate": 4.632005899705015e-05,
44
+ "loss": 4.1234,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.4424778761061947,
49
+ "grad_norm": 2.6267709732055664,
50
+ "learning_rate": 4.558259587020649e-05,
51
+ "loss": 3.9828,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.5162241887905604,
56
+ "grad_norm": 2.364206314086914,
57
+ "learning_rate": 4.484513274336283e-05,
58
+ "loss": 3.8663,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.5899705014749262,
63
+ "grad_norm": 2.8979170322418213,
64
+ "learning_rate": 4.410766961651918e-05,
65
+ "loss": 3.7611,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.6637168141592921,
70
+ "grad_norm": 2.5105957984924316,
71
+ "learning_rate": 4.337020648967552e-05,
72
+ "loss": 3.618,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.7374631268436578,
77
+ "grad_norm": 2.793271780014038,
78
+ "learning_rate": 4.263274336283186e-05,
79
+ "loss": 3.5201,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.8112094395280236,
84
+ "grad_norm": 2.612252950668335,
85
+ "learning_rate": 4.189528023598821e-05,
86
+ "loss": 3.4934,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.8849557522123894,
91
+ "grad_norm": 2.8699395656585693,
92
+ "learning_rate": 4.1157817109144546e-05,
93
+ "loss": 3.4027,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.9587020648967551,
98
+ "grad_norm": 2.758632183074951,
99
+ "learning_rate": 4.0420353982300885e-05,
100
+ "loss": 3.3962,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 1.0,
105
+ "eval_loss": 3.288789987564087,
106
+ "eval_runtime": 18.9315,
107
+ "eval_samples_per_second": 127.354,
108
+ "eval_steps_per_second": 7.976,
109
+ "step": 1356
110
+ },
111
+ {
112
+ "epoch": 1.0324483775811208,
113
+ "grad_norm": 2.836648464202881,
114
+ "learning_rate": 3.968289085545723e-05,
115
+ "loss": 3.2942,
116
+ "step": 1400
117
+ },
118
+ {
119
+ "epoch": 1.1061946902654867,
120
+ "grad_norm": 2.569132089614868,
121
+ "learning_rate": 3.894542772861357e-05,
122
+ "loss": 3.1295,
123
+ "step": 1500
124
+ },
125
+ {
126
+ "epoch": 1.1799410029498525,
127
+ "grad_norm": 2.712193250656128,
128
+ "learning_rate": 3.8207964601769915e-05,
129
+ "loss": 3.0838,
130
+ "step": 1600
131
+ },
132
+ {
133
+ "epoch": 1.2536873156342183,
134
+ "grad_norm": 2.9676578044891357,
135
+ "learning_rate": 3.747050147492625e-05,
136
+ "loss": 3.0715,
137
+ "step": 1700
138
+ },
139
+ {
140
+ "epoch": 1.3274336283185841,
141
+ "grad_norm": 2.5203654766082764,
142
+ "learning_rate": 3.67330383480826e-05,
143
+ "loss": 3.0622,
144
+ "step": 1800
145
+ },
146
+ {
147
+ "epoch": 1.4011799410029497,
148
+ "grad_norm": 2.3672962188720703,
149
+ "learning_rate": 3.5995575221238944e-05,
150
+ "loss": 2.9971,
151
+ "step": 1900
152
+ },
153
+ {
154
+ "epoch": 1.4749262536873156,
155
+ "grad_norm": 2.5039596557617188,
156
+ "learning_rate": 3.5258112094395276e-05,
157
+ "loss": 2.9881,
158
+ "step": 2000
159
+ },
160
+ {
161
+ "epoch": 1.5486725663716814,
162
+ "grad_norm": 2.587226152420044,
163
+ "learning_rate": 3.452064896755162e-05,
164
+ "loss": 2.915,
165
+ "step": 2100
166
+ },
167
+ {
168
+ "epoch": 1.6224188790560472,
169
+ "grad_norm": 2.8577704429626465,
170
+ "learning_rate": 3.378318584070797e-05,
171
+ "loss": 2.9573,
172
+ "step": 2200
173
+ },
174
+ {
175
+ "epoch": 1.696165191740413,
176
+ "grad_norm": 2.520150899887085,
177
+ "learning_rate": 3.3045722713864306e-05,
178
+ "loss": 2.9095,
179
+ "step": 2300
180
+ },
181
+ {
182
+ "epoch": 1.7699115044247788,
183
+ "grad_norm": 2.763072967529297,
184
+ "learning_rate": 3.230825958702065e-05,
185
+ "loss": 2.8751,
186
+ "step": 2400
187
+ },
188
+ {
189
+ "epoch": 1.8436578171091447,
190
+ "grad_norm": 2.539698600769043,
191
+ "learning_rate": 3.1570796460176996e-05,
192
+ "loss": 2.8752,
193
+ "step": 2500
194
+ },
195
+ {
196
+ "epoch": 1.9174041297935103,
197
+ "grad_norm": 2.599785327911377,
198
+ "learning_rate": 3.0833333333333335e-05,
199
+ "loss": 2.8176,
200
+ "step": 2600
201
+ },
202
+ {
203
+ "epoch": 1.991150442477876,
204
+ "grad_norm": 2.660834789276123,
205
+ "learning_rate": 3.009587020648968e-05,
206
+ "loss": 2.8026,
207
+ "step": 2700
208
+ },
209
+ {
210
+ "epoch": 2.0,
211
+ "eval_loss": 2.860244035720825,
212
+ "eval_runtime": 18.9481,
213
+ "eval_samples_per_second": 127.242,
214
+ "eval_steps_per_second": 7.969,
215
+ "step": 2712
216
+ }
217
+ ],
218
+ "logging_steps": 100,
219
+ "max_steps": 6780,
220
+ "num_input_tokens_seen": 0,
221
+ "num_train_epochs": 5,
222
+ "save_steps": 500,
223
+ "stateful_callbacks": {
224
+ "TrainerControl": {
225
+ "args": {
226
+ "should_epoch_stop": false,
227
+ "should_evaluate": false,
228
+ "should_log": false,
229
+ "should_save": true,
230
+ "should_training_stop": false
231
+ },
232
+ "attributes": {}
233
+ }
234
+ },
235
+ "total_flos": 4.029818053695898e+16,
236
+ "train_batch_size": 16,
237
+ "trial_name": null,
238
+ "trial_params": null
239
+ }
a2/completion/checkpoint-2712/vocab.json ADDED
The diff for this file is too large to render. See raw diff