Joosep Pata commited on
Commit
b278362
·
1 Parent(s): 8cd2762

added gnnlsh training

Browse files
Files changed (21) hide show
  1. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/.gitattributes +4 -0
  2. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/batch0_epoch1.parquet +3 -0
  3. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/batch0_epoch2.parquet +3 -0
  4. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/checkpoints/checkpoint-02-5.206203.pth +3 -0
  5. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/history/epoch_1.json +1 -0
  6. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/history/epoch_2.json +1 -0
  7. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/hyperparameters.json +1 -0
  8. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/model_kwargs.pkl +3 -0
  9. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/overridden_config.yaml +225 -0
  10. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_0.parquet +3 -0
  11. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_1.parquet +3 -0
  12. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_2.parquet +3 -0
  13. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_3.parquet +3 -0
  14. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_4.parquet +3 -0
  15. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_5.parquet +3 -0
  16. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_6.parquet +3 -0
  17. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_7.parquet +3 -0
  18. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/runs/train/events.out.tfevents.1738649783.gpu1.local.3067131.0 +3 -0
  19. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/runs/valid/events.out.tfevents.1738649783.gpu1.local.3067131.1 +3 -0
  20. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/train-config.yaml +225 -0
  21. clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/train.log +811 -0
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/.gitattributes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ plots_checkpoint*/** filter=lfs diff=lfs merge=lfs -text
2
+ preds_checkpoint*/** filter=lfs diff=lfs merge=lfs -text
3
+ runs/** filter=lfs diff=lfs merge=lfs -text
4
+ checkpoints/** filter=lfs diff=lfs merge=lfs -text
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/batch0_epoch1.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c224dffffac28a746c270f295e120fc532d4312d69f247eba33436a4db4eb6fd
3
+ size 2531094
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/batch0_epoch2.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48d0d88b9b1f2c78af292b318dc9e592fbe600ab2b1100cbf86b9301a1ea0341
3
+ size 2537425
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/checkpoints/checkpoint-02-5.206203.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0afdd229f9b76f39ad18bb1bbf8e11e8dafe292580a59340392f4c5b75eb84
3
+ size 643435404
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/history/epoch_1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train": {"Regression_pt": 0.25910334646839267, "Regression_eta": 0.0008618684659893994, "Regression_sin_phi": 0.0006182023768865629, "Regression_cos_phi": 0.0006280264262564951, "Regression_energy": 0.24251854639915127, "Classification_binary": 4.559183174642798, "Classification": 0.05994237113451846, "ispu": 8.946376891725996e-06, "MET": 21.453700401778285, "Sliced_Wasserstein_Loss": 65.35367781660842, "Total": 5.122855523262725}, "valid": {"Regression_pt": 0.2624917096143816, "Regression_eta": 0.000599030926991009, "Regression_sin_phi": 0.0005111121437543724, "Regression_cos_phi": 0.0004291858879196283, "Regression_energy": 0.25430431388413394, "Classification_binary": 4.326086829330267, "Classification": 0.05409432865533393, "ispu": 0.0, "MET": 15.170460776184871, "Sliced_Wasserstein_Loss": 61.53729137691238, "Total": 4.898515232160051}, "epoch_train_time": 166139.00810337067, "epoch_valid_time": 1447.8257393836975, "epoch_total_time": 167586.83384537697}
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/history/epoch_2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train": {"Regression_pt": 0.25380500213221835, "Regression_eta": 0.0005493453817338307, "Regression_sin_phi": 0.00038346989650266973, "Regression_cos_phi": 0.0004457221137636458, "Regression_energy": 0.2443389649528683, "Classification_binary": 4.283395962603714, "Classification": 0.05482413586651273, "ispu": 0.0, "MET": 18.887216793856833, "Sliced_Wasserstein_Loss": 62.04546156955043, "Total": 4.837748213988541}, "valid": {"Regression_pt": 0.26054830113438804, "Regression_eta": 0.0005653613874003429, "Regression_sin_phi": 0.00036033755561891466, "Regression_cos_phi": 0.0005135214289229248, "Regression_energy": 0.2509168529173799, "Classification_binary": 4.6393808675243395, "Classification": 0.05391483423198954, "ispu": 0.0, "MET": 22.03946286241575, "Sliced_Wasserstein_Loss": 63.26808066759388, "Total": 5.206203360703969}, "epoch_train_time": 166185.94912409782, "epoch_valid_time": 1453.421977519989, "epoch_total_time": 167639.3711025715}
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/hyperparameters.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"num_mlpf_params": 53574931, "train": true, "test": null, "make_plots": null, "comet": true, "save_attention": true, "dataset": "clic", "sort_data": false, "data_dir": "/scratch/persistent/joosep/tensorflow_datasets", "gpus": 1, "gpu_batch_multiplier": 256, "load": null, "finetune": null, "num_epochs": 10, "patience": 20, "lr": 0.0001, "lr_schedule": "cosinedecay", "conv_type": "gnn_lsh", "ntrain": null, "ntest": 2000, "nvalid": null, "num_workers": 8, "prefetch_factor": 100, "checkpoint_freq": 1, "comet_name": "particleflow-pt", "comet_offline": false, "comet_step_freq": 100, "dtype": "bfloat16", "val_freq": null, "model": {"trainable": "all", "learned_representation_mode": "last", "input_encoding": "split", "pt_mode": "direct-elemtype-split", "eta_mode": "linear", "sin_phi_mode": "linear", "cos_phi_mode": "linear", "energy_mode": "direct-elemtype-split", "gnn_lsh": {"conv_type": "gnn_lsh", "embedding_dim": 1024, "width": 1024, "num_convs": 3, "activation": "elu", "bin_size": 32, "max_num_bins": 200, "distance_dim": 128, "layernorm": true, "num_node_messages": 2, "ffn_dist_hidden_dim": 128, "ffn_dist_num_layers": 2}, "attention": {"conv_type": "attention", "num_convs": 3, "dropout_ff": 0.0, "dropout_conv_id_mha": 0.0, "dropout_conv_id_ff": 0.0, "dropout_conv_reg_mha": 0.0, "dropout_conv_reg_ff": 0.0, "activation": "relu", "head_dim": 32, "num_heads": 32, "attention_type": "flash", "use_pre_layernorm": true}, "mamba": {"conv_type": "mamba", "embedding_dim": 128, "width": 128, "num_convs": 2, "dropout": 0.0, "activation": "elu", "num_heads": 2, "d_state": 16, "d_conv": 4, "expand": 2}}, "lr_schedule_config": {"onecycle": {"pct_start": 0.3}}, "raytune": {"local_dir": null, "sched": null, "search_alg": null, "default_metric": "val_loss", "default_mode": "min", "asha": {"max_t": 200, "reduction_factor": 4, "brackets": 1, "grace_period": 10}, "hyperband": {"max_t": 200, "reduction_factor": 4}, "hyperopt": {"n_random_steps": 10}, "nevergrad": {"n_random_steps": 10}}, "train_dataset": {"clic": {"physical": {"batch_size": 1, "samples": {"clic_edm_qq_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ttbar_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ww_fullhad_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}}}}}, "valid_dataset": {"clic": {"physical": {"batch_size": 1, "samples": {"clic_edm_qq_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ttbar_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ww_fullhad_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}}}}}, "test_dataset": {"clic_edm_qq_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ttbar_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ww_fullhad_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}}, "enabled_test_datasets": ["clic_edm_qq_pf"]}
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/model_kwargs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544045d26763d5a9d2fb2954cd23771043b9263e2c82abaa00210b34553bf24e
3
+ size 491
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/overridden_config.yaml ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint_freq: 1
2
+ comet: true
3
+ comet_name: particleflow-pt
4
+ comet_offline: false
5
+ comet_step_freq: 100
6
+ conv_type: gnn_lsh
7
+ data_dir: /scratch/persistent/joosep/tensorflow_datasets
8
+ dataset: clic
9
+ dtype: bfloat16
10
+ enabled_test_datasets:
11
+ - clic_edm_qq_pf
12
+ finetune: null
13
+ gpu_batch_multiplier: 256
14
+ gpus: 1
15
+ load: null
16
+ lr: 0.0001
17
+ lr_schedule: cosinedecay
18
+ lr_schedule_config:
19
+ onecycle:
20
+ pct_start: 0.3
21
+ make_plots: null
22
+ model:
23
+ attention:
24
+ activation: relu
25
+ attention_type: flash
26
+ conv_type: attention
27
+ dropout_conv_id_ff: 0.0
28
+ dropout_conv_id_mha: 0.0
29
+ dropout_conv_reg_ff: 0.0
30
+ dropout_conv_reg_mha: 0.0
31
+ dropout_ff: 0.0
32
+ head_dim: 32
33
+ num_convs: 3
34
+ num_heads: 32
35
+ use_pre_layernorm: true
36
+ cos_phi_mode: linear
37
+ energy_mode: direct-elemtype-split
38
+ eta_mode: linear
39
+ gnn_lsh:
40
+ activation: elu
41
+ bin_size: 32
42
+ conv_type: gnn_lsh
43
+ distance_dim: 128
44
+ embedding_dim: 1024
45
+ ffn_dist_hidden_dim: 128
46
+ ffn_dist_num_layers: 2
47
+ layernorm: true
48
+ max_num_bins: 200
49
+ num_convs: 3
50
+ num_node_messages: 2
51
+ width: 1024
52
+ input_encoding: split
53
+ learned_representation_mode: last
54
+ mamba:
55
+ activation: elu
56
+ conv_type: mamba
57
+ d_conv: 4
58
+ d_state: 16
59
+ dropout: 0.0
60
+ embedding_dim: 128
61
+ expand: 2
62
+ num_convs: 2
63
+ num_heads: 2
64
+ width: 128
65
+ pt_mode: direct-elemtype-split
66
+ sin_phi_mode: linear
67
+ trainable: all
68
+ ntest: 2000
69
+ ntrain: null
70
+ num_epochs: 10
71
+ num_workers: 8
72
+ nvalid: null
73
+ patience: 20
74
+ prefetch_factor: 100
75
+ raytune:
76
+ asha:
77
+ brackets: 1
78
+ grace_period: 10
79
+ max_t: 200
80
+ reduction_factor: 4
81
+ default_metric: val_loss
82
+ default_mode: min
83
+ hyperband:
84
+ max_t: 200
85
+ reduction_factor: 4
86
+ hyperopt:
87
+ n_random_steps: 10
88
+ local_dir: null
89
+ nevergrad:
90
+ n_random_steps: 10
91
+ sched: null
92
+ search_alg: null
93
+ save_attention: true
94
+ sort_data: false
95
+ test: null
96
+ test_dataset:
97
+ clic_edm_qq_pf:
98
+ splits:
99
+ - 1
100
+ - 2
101
+ - 3
102
+ - 4
103
+ - 5
104
+ - 6
105
+ - 7
106
+ - 8
107
+ - 9
108
+ - 10
109
+ version: 2.5.0
110
+ clic_edm_ttbar_pf:
111
+ splits:
112
+ - 1
113
+ - 2
114
+ - 3
115
+ - 4
116
+ - 5
117
+ - 6
118
+ - 7
119
+ - 8
120
+ - 9
121
+ - 10
122
+ version: 2.5.0
123
+ clic_edm_ww_fullhad_pf:
124
+ splits:
125
+ - 1
126
+ - 2
127
+ - 3
128
+ - 4
129
+ - 5
130
+ - 6
131
+ - 7
132
+ - 8
133
+ - 9
134
+ - 10
135
+ version: 2.5.0
136
+ train: true
137
+ train_dataset:
138
+ clic:
139
+ physical:
140
+ batch_size: 1
141
+ samples:
142
+ clic_edm_qq_pf:
143
+ splits:
144
+ - 1
145
+ - 2
146
+ - 3
147
+ - 4
148
+ - 5
149
+ - 6
150
+ - 7
151
+ - 8
152
+ - 9
153
+ - 10
154
+ version: 2.5.0
155
+ clic_edm_ttbar_pf:
156
+ splits:
157
+ - 1
158
+ - 2
159
+ - 3
160
+ - 4
161
+ - 5
162
+ - 6
163
+ - 7
164
+ - 8
165
+ - 9
166
+ - 10
167
+ version: 2.5.0
168
+ clic_edm_ww_fullhad_pf:
169
+ splits:
170
+ - 1
171
+ - 2
172
+ - 3
173
+ - 4
174
+ - 5
175
+ - 6
176
+ - 7
177
+ - 8
178
+ - 9
179
+ - 10
180
+ version: 2.5.0
181
+ val_freq: null
182
+ valid_dataset:
183
+ clic:
184
+ physical:
185
+ batch_size: 1
186
+ samples:
187
+ clic_edm_qq_pf:
188
+ splits:
189
+ - 1
190
+ - 2
191
+ - 3
192
+ - 4
193
+ - 5
194
+ - 6
195
+ - 7
196
+ - 8
197
+ - 9
198
+ - 10
199
+ version: 2.5.0
200
+ clic_edm_ttbar_pf:
201
+ splits:
202
+ - 1
203
+ - 2
204
+ - 3
205
+ - 4
206
+ - 5
207
+ - 6
208
+ - 7
209
+ - 8
210
+ - 9
211
+ - 10
212
+ version: 2.5.0
213
+ clic_edm_ww_fullhad_pf:
214
+ splits:
215
+ - 1
216
+ - 2
217
+ - 3
218
+ - 4
219
+ - 5
220
+ - 6
221
+ - 7
222
+ - 8
223
+ - 9
224
+ - 10
225
+ version: 2.5.0
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_0.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da42d7ce3c2811160b8875dd27f3e5803ad73e35975e248782ad3a4cc1687d9
3
+ size 3697789
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_1.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75fde31886b73991fdd1b030b8362ce82279384e4f9ca530417dc0ed40004c75
3
+ size 3924659
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_2.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e2de2441b7d8537b28dec48de817446a0e0e9e3ebc3a860dbacb14d9acdd5d
3
+ size 3749584
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_3.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a81cdb9200535c1700c532b35ee42bf6ada7e0e62f99d938d42245eb949aa083
3
+ size 3892284
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_4.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cc2b7e0511ef25f6ba143d13dc7a1d0937f2860b23ff5aee556514524cc2acf
3
+ size 3958748
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_5.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b63a7435d52925ae73e12b73434262b48478a85fd7c3c33408c723172aa516
3
+ size 3869607
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_6.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beb2b7796dbc4e7365c3a70477e1686c063506f7982d6656491563636367bc82
3
+ size 3644752
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_7.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc53e9fda0a37737823dc83ad99127ee2de7d7b1772e8f4dd48fe520095bcc78
3
+ size 3117659
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/runs/train/events.out.tfevents.1738649783.gpu1.local.3067131.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbe281fada73723faad162ab2aabcbeb738ef28af2f43bfac9e527b0a2f8ead5
3
+ size 1047551
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/runs/valid/events.out.tfevents.1738649783.gpu1.local.3067131.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcce84bbda1cc05e167ec6fff5cf3488c1f7593c3f0939ea8c1d21442010539
3
+ size 966360
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/train-config.yaml ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint_freq: 1
2
+ comet: true
3
+ comet_name: particleflow-pt
4
+ comet_offline: false
5
+ comet_step_freq: 100
6
+ conv_type: gnn_lsh
7
+ data_dir: /scratch/persistent/joosep/tensorflow_datasets
8
+ dataset: clic
9
+ dtype: bfloat16
10
+ enabled_test_datasets:
11
+ - clic_edm_qq_pf
12
+ finetune: null
13
+ gpu_batch_multiplier: 256
14
+ gpus: 1
15
+ load: null
16
+ lr: 0.0001
17
+ lr_schedule: cosinedecay
18
+ lr_schedule_config:
19
+ onecycle:
20
+ pct_start: 0.3
21
+ make_plots: null
22
+ model:
23
+ attention:
24
+ activation: relu
25
+ attention_type: flash
26
+ conv_type: attention
27
+ dropout_conv_id_ff: 0.0
28
+ dropout_conv_id_mha: 0.0
29
+ dropout_conv_reg_ff: 0.0
30
+ dropout_conv_reg_mha: 0.0
31
+ dropout_ff: 0.0
32
+ head_dim: 32
33
+ num_convs: 3
34
+ num_heads: 32
35
+ use_pre_layernorm: true
36
+ cos_phi_mode: linear
37
+ energy_mode: direct-elemtype-split
38
+ eta_mode: linear
39
+ gnn_lsh:
40
+ activation: elu
41
+ bin_size: 32
42
+ conv_type: gnn_lsh
43
+ distance_dim: 128
44
+ embedding_dim: 1024
45
+ ffn_dist_hidden_dim: 128
46
+ ffn_dist_num_layers: 2
47
+ layernorm: true
48
+ max_num_bins: 200
49
+ num_convs: 3
50
+ num_node_messages: 2
51
+ width: 1024
52
+ input_encoding: split
53
+ learned_representation_mode: last
54
+ mamba:
55
+ activation: elu
56
+ conv_type: mamba
57
+ d_conv: 4
58
+ d_state: 16
59
+ dropout: 0.0
60
+ embedding_dim: 128
61
+ expand: 2
62
+ num_convs: 2
63
+ num_heads: 2
64
+ width: 128
65
+ pt_mode: direct-elemtype-split
66
+ sin_phi_mode: linear
67
+ trainable: all
68
+ ntest: 2000
69
+ ntrain: null
70
+ num_epochs: 10
71
+ num_workers: 8
72
+ nvalid: null
73
+ patience: 20
74
+ prefetch_factor: 100
75
+ raytune:
76
+ asha:
77
+ brackets: 1
78
+ grace_period: 10
79
+ max_t: 200
80
+ reduction_factor: 4
81
+ default_metric: val_loss
82
+ default_mode: min
83
+ hyperband:
84
+ max_t: 200
85
+ reduction_factor: 4
86
+ hyperopt:
87
+ n_random_steps: 10
88
+ local_dir: null
89
+ nevergrad:
90
+ n_random_steps: 10
91
+ sched: null
92
+ search_alg: null
93
+ save_attention: true
94
+ sort_data: false
95
+ test: null
96
+ test_dataset:
97
+ clic_edm_qq_pf:
98
+ splits:
99
+ - 1
100
+ - 2
101
+ - 3
102
+ - 4
103
+ - 5
104
+ - 6
105
+ - 7
106
+ - 8
107
+ - 9
108
+ - 10
109
+ version: 2.5.0
110
+ clic_edm_ttbar_pf:
111
+ splits:
112
+ - 1
113
+ - 2
114
+ - 3
115
+ - 4
116
+ - 5
117
+ - 6
118
+ - 7
119
+ - 8
120
+ - 9
121
+ - 10
122
+ version: 2.5.0
123
+ clic_edm_ww_fullhad_pf:
124
+ splits:
125
+ - 1
126
+ - 2
127
+ - 3
128
+ - 4
129
+ - 5
130
+ - 6
131
+ - 7
132
+ - 8
133
+ - 9
134
+ - 10
135
+ version: 2.5.0
136
+ train: true
137
+ train_dataset:
138
+ clic:
139
+ physical:
140
+ batch_size: 1
141
+ samples:
142
+ clic_edm_qq_pf:
143
+ splits:
144
+ - 1
145
+ - 2
146
+ - 3
147
+ - 4
148
+ - 5
149
+ - 6
150
+ - 7
151
+ - 8
152
+ - 9
153
+ - 10
154
+ version: 2.5.0
155
+ clic_edm_ttbar_pf:
156
+ splits:
157
+ - 1
158
+ - 2
159
+ - 3
160
+ - 4
161
+ - 5
162
+ - 6
163
+ - 7
164
+ - 8
165
+ - 9
166
+ - 10
167
+ version: 2.5.0
168
+ clic_edm_ww_fullhad_pf:
169
+ splits:
170
+ - 1
171
+ - 2
172
+ - 3
173
+ - 4
174
+ - 5
175
+ - 6
176
+ - 7
177
+ - 8
178
+ - 9
179
+ - 10
180
+ version: 2.5.0
181
+ val_freq: null
182
+ valid_dataset:
183
+ clic:
184
+ physical:
185
+ batch_size: 1
186
+ samples:
187
+ clic_edm_qq_pf:
188
+ splits:
189
+ - 1
190
+ - 2
191
+ - 3
192
+ - 4
193
+ - 5
194
+ - 6
195
+ - 7
196
+ - 8
197
+ - 9
198
+ - 10
199
+ version: 2.5.0
200
+ clic_edm_ttbar_pf:
201
+ splits:
202
+ - 1
203
+ - 2
204
+ - 3
205
+ - 4
206
+ - 5
207
+ - 6
208
+ - 7
209
+ - 8
210
+ - 9
211
+ - 10
212
+ version: 2.5.0
213
+ clic_edm_ww_fullhad_pf:
214
+ splits:
215
+ - 1
216
+ - 2
217
+ - 3
218
+ - 4
219
+ - 5
220
+ - 6
221
+ - 7
222
+ - 8
223
+ - 9
224
+ - 10
225
+ version: 2.5.0
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/train.log ADDED
@@ -0,0 +1,811 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-02-04 08:16:15,870] INFO: Will use single-gpu: NVIDIA A100 80GB PCIe
2
+ [2025-02-04 08:16:15,870] INFO: configured dtype=torch.bfloat16 for autocast
3
+ [2025-02-04 08:16:15,870] INFO: configured dtype=torch.bfloat16 for autocast
4
+ [2025-02-04 08:16:16,478] INFO: MLPF(
5
+ (nn0_id): ModuleList(
6
+ (0-1): 2 x Sequential(
7
+ (0): Linear(in_features=17, out_features=1024, bias=True)
8
+ (1): ELU(alpha=1.0)
9
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
10
+ (3): Dropout(p=0.0, inplace=False)
11
+ (4): Linear(in_features=1024, out_features=1024, bias=True)
12
+ )
13
+ )
14
+ (nn0_reg): ModuleList(
15
+ (0-1): 2 x Sequential(
16
+ (0): Linear(in_features=17, out_features=1024, bias=True)
17
+ (1): ELU(alpha=1.0)
18
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
19
+ (3): Dropout(p=0.0, inplace=False)
20
+ (4): Linear(in_features=1024, out_features=1024, bias=True)
21
+ )
22
+ )
23
+ (conv_id): ModuleList(
24
+ (0-2): 3 x CombinedGraphLayer(
25
+ (layernorm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
26
+ (ffn_dist): Sequential(
27
+ (0): Linear(in_features=1024, out_features=128, bias=True)
28
+ (1): ELU(alpha=1.0)
29
+ (2): Linear(in_features=128, out_features=128, bias=True)
30
+ (3): ELU(alpha=1.0)
31
+ (4): Linear(in_features=128, out_features=128, bias=True)
32
+ )
33
+ (message_building_layer): MessageBuildingLayerLSH(
34
+ (kernel): NodePairGaussianKernel()
35
+ )
36
+ (message_passing_layers): ModuleList(
37
+ (0-1): 2 x GHConvDense()
38
+ )
39
+ )
40
+ )
41
+ (conv_reg): ModuleList(
42
+ (0-2): 3 x CombinedGraphLayer(
43
+ (layernorm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
44
+ (ffn_dist): Sequential(
45
+ (0): Linear(in_features=1024, out_features=128, bias=True)
46
+ (1): ELU(alpha=1.0)
47
+ (2): Linear(in_features=128, out_features=128, bias=True)
48
+ (3): ELU(alpha=1.0)
49
+ (4): Linear(in_features=128, out_features=128, bias=True)
50
+ )
51
+ (message_building_layer): MessageBuildingLayerLSH(
52
+ (kernel): NodePairGaussianKernel()
53
+ )
54
+ (message_passing_layers): ModuleList(
55
+ (0-1): 2 x GHConvDense()
56
+ )
57
+ )
58
+ )
59
+ (nn_binary_particle): Sequential(
60
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
61
+ (1): ELU(alpha=1.0)
62
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
63
+ (3): Dropout(p=0.0, inplace=False)
64
+ (4): Linear(in_features=1024, out_features=2, bias=True)
65
+ )
66
+ (nn_pid): Sequential(
67
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
68
+ (1): ELU(alpha=1.0)
69
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
70
+ (3): Dropout(p=0.0, inplace=False)
71
+ (4): Linear(in_features=1024, out_features=6, bias=True)
72
+ )
73
+ (nn_pu): Sequential(
74
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
75
+ (1): ELU(alpha=1.0)
76
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
77
+ (3): Dropout(p=0.0, inplace=False)
78
+ (4): Linear(in_features=1024, out_features=1, bias=True)
79
+ )
80
+ (nn_pt): RegressionOutput(
81
+ (nn): ModuleList(
82
+ (0-1): 2 x Sequential(
83
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
84
+ (1): ELU(alpha=1.0)
85
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
86
+ (3): Dropout(p=0.0, inplace=False)
87
+ (4): Linear(in_features=1024, out_features=1, bias=True)
88
+ )
89
+ )
90
+ )
91
+ (nn_eta): RegressionOutput(
92
+ (nn): Sequential(
93
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
94
+ (1): ELU(alpha=1.0)
95
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
96
+ (3): Dropout(p=0.0, inplace=False)
97
+ (4): Linear(in_features=1024, out_features=2, bias=True)
98
+ )
99
+ )
100
+ (nn_sin_phi): RegressionOutput(
101
+ (nn): Sequential(
102
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
103
+ (1): ELU(alpha=1.0)
104
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
105
+ (3): Dropout(p=0.0, inplace=False)
106
+ (4): Linear(in_features=1024, out_features=2, bias=True)
107
+ )
108
+ )
109
+ (nn_cos_phi): RegressionOutput(
110
+ (nn): Sequential(
111
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
112
+ (1): ELU(alpha=1.0)
113
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
114
+ (3): Dropout(p=0.0, inplace=False)
115
+ (4): Linear(in_features=1024, out_features=2, bias=True)
116
+ )
117
+ )
118
+ (nn_energy): RegressionOutput(
119
+ (nn): ModuleList(
120
+ (0-1): 2 x Sequential(
121
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
122
+ (1): ELU(alpha=1.0)
123
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
124
+ (3): Dropout(p=0.0, inplace=False)
125
+ (4): Linear(in_features=1024, out_features=1, bias=True)
126
+ )
127
+ )
128
+ )
129
+ )
130
+ [2025-02-04 08:16:16,478] INFO: MLPF(
131
+ (nn0_id): ModuleList(
132
+ (0-1): 2 x Sequential(
133
+ (0): Linear(in_features=17, out_features=1024, bias=True)
134
+ (1): ELU(alpha=1.0)
135
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
136
+ (3): Dropout(p=0.0, inplace=False)
137
+ (4): Linear(in_features=1024, out_features=1024, bias=True)
138
+ )
139
+ )
140
+ (nn0_reg): ModuleList(
141
+ (0-1): 2 x Sequential(
142
+ (0): Linear(in_features=17, out_features=1024, bias=True)
143
+ (1): ELU(alpha=1.0)
144
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
145
+ (3): Dropout(p=0.0, inplace=False)
146
+ (4): Linear(in_features=1024, out_features=1024, bias=True)
147
+ )
148
+ )
149
+ (conv_id): ModuleList(
150
+ (0-2): 3 x CombinedGraphLayer(
151
+ (layernorm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
152
+ (ffn_dist): Sequential(
153
+ (0): Linear(in_features=1024, out_features=128, bias=True)
154
+ (1): ELU(alpha=1.0)
155
+ (2): Linear(in_features=128, out_features=128, bias=True)
156
+ (3): ELU(alpha=1.0)
157
+ (4): Linear(in_features=128, out_features=128, bias=True)
158
+ )
159
+ (message_building_layer): MessageBuildingLayerLSH(
160
+ (kernel): NodePairGaussianKernel()
161
+ )
162
+ (message_passing_layers): ModuleList(
163
+ (0-1): 2 x GHConvDense()
164
+ )
165
+ )
166
+ )
167
+ (conv_reg): ModuleList(
168
+ (0-2): 3 x CombinedGraphLayer(
169
+ (layernorm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
170
+ (ffn_dist): Sequential(
171
+ (0): Linear(in_features=1024, out_features=128, bias=True)
172
+ (1): ELU(alpha=1.0)
173
+ (2): Linear(in_features=128, out_features=128, bias=True)
174
+ (3): ELU(alpha=1.0)
175
+ (4): Linear(in_features=128, out_features=128, bias=True)
176
+ )
177
+ (message_building_layer): MessageBuildingLayerLSH(
178
+ (kernel): NodePairGaussianKernel()
179
+ )
180
+ (message_passing_layers): ModuleList(
181
+ (0-1): 2 x GHConvDense()
182
+ )
183
+ )
184
+ )
185
+ (nn_binary_particle): Sequential(
186
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
187
+ (1): ELU(alpha=1.0)
188
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
189
+ (3): Dropout(p=0.0, inplace=False)
190
+ (4): Linear(in_features=1024, out_features=2, bias=True)
191
+ )
192
+ (nn_pid): Sequential(
193
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
194
+ (1): ELU(alpha=1.0)
195
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
196
+ (3): Dropout(p=0.0, inplace=False)
197
+ (4): Linear(in_features=1024, out_features=6, bias=True)
198
+ )
199
+ (nn_pu): Sequential(
200
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
201
+ (1): ELU(alpha=1.0)
202
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
203
+ (3): Dropout(p=0.0, inplace=False)
204
+ (4): Linear(in_features=1024, out_features=1, bias=True)
205
+ )
206
+ (nn_pt): RegressionOutput(
207
+ (nn): ModuleList(
208
+ (0-1): 2 x Sequential(
209
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
210
+ (1): ELU(alpha=1.0)
211
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
212
+ (3): Dropout(p=0.0, inplace=False)
213
+ (4): Linear(in_features=1024, out_features=1, bias=True)
214
+ )
215
+ )
216
+ )
217
+ (nn_eta): RegressionOutput(
218
+ (nn): Sequential(
219
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
220
+ (1): ELU(alpha=1.0)
221
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
222
+ (3): Dropout(p=0.0, inplace=False)
223
+ (4): Linear(in_features=1024, out_features=2, bias=True)
224
+ )
225
+ )
226
+ (nn_sin_phi): RegressionOutput(
227
+ (nn): Sequential(
228
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
229
+ (1): ELU(alpha=1.0)
230
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
231
+ (3): Dropout(p=0.0, inplace=False)
232
+ (4): Linear(in_features=1024, out_features=2, bias=True)
233
+ )
234
+ )
235
+ (nn_cos_phi): RegressionOutput(
236
+ (nn): Sequential(
237
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
238
+ (1): ELU(alpha=1.0)
239
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
240
+ (3): Dropout(p=0.0, inplace=False)
241
+ (4): Linear(in_features=1024, out_features=2, bias=True)
242
+ )
243
+ )
244
+ (nn_energy): RegressionOutput(
245
+ (nn): ModuleList(
246
+ (0-1): 2 x Sequential(
247
+ (0): Linear(in_features=1024, out_features=1024, bias=True)
248
+ (1): ELU(alpha=1.0)
249
+ (2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
250
+ (3): Dropout(p=0.0, inplace=False)
251
+ (4): Linear(in_features=1024, out_features=1, bias=True)
252
+ )
253
+ )
254
+ )
255
+ )
256
+ [2025-02-04 08:16:16,480] INFO: Trainable parameters: 53574931
257
+ [2025-02-04 08:16:16,480] INFO: Trainable parameters: 53574931
258
+ [2025-02-04 08:16:16,480] INFO: Non-trainable parameters: 76800
259
+ [2025-02-04 08:16:16,480] INFO: Non-trainable parameters: 76800
260
+ [2025-02-04 08:16:16,480] INFO: Total parameters: 53651731
261
+ [2025-02-04 08:16:16,480] INFO: Total parameters: 53651731
262
+ [2025-02-04 08:16:16,482] INFO: Modules Trainable parameters Non-trainable parameters
263
+ nn0_id.0.0.weight 17408 0
264
+ nn0_id.0.0.bias 1024 0
265
+ nn0_id.0.2.weight 1024 0
266
+ nn0_id.0.2.bias 1024 0
267
+ nn0_id.0.4.weight 1048576 0
268
+ nn0_id.0.4.bias 1024 0
269
+ nn0_id.1.0.weight 17408 0
270
+ nn0_id.1.0.bias 1024 0
271
+ nn0_id.1.2.weight 1024 0
272
+ nn0_id.1.2.bias 1024 0
273
+ nn0_id.1.4.weight 1048576 0
274
+ nn0_id.1.4.bias 1024 0
275
+ nn0_reg.0.0.weight 17408 0
276
+ nn0_reg.0.0.bias 1024 0
277
+ nn0_reg.0.2.weight 1024 0
278
+ nn0_reg.0.2.bias 1024 0
279
+ nn0_reg.0.4.weight 1048576 0
280
+ nn0_reg.0.4.bias 1024 0
281
+ nn0_reg.1.0.weight 17408 0
282
+ nn0_reg.1.0.bias 1024 0
283
+ nn0_reg.1.2.weight 1024 0
284
+ nn0_reg.1.2.bias 1024 0
285
+ nn0_reg.1.4.weight 1048576 0
286
+ nn0_reg.1.4.bias 1024 0
287
+ conv_id.0.layernorm1.weight 1024 0
288
+ conv_id.0.layernorm1.bias 1024 0
289
+ conv_id.0.ffn_dist.0.weight 131072 0
290
+ conv_id.0.ffn_dist.0.bias 128 0
291
+ conv_id.0.ffn_dist.2.weight 16384 0
292
+ conv_id.0.ffn_dist.2.bias 128 0
293
+ conv_id.0.ffn_dist.4.weight 16384 0
294
+ conv_id.0.ffn_dist.4.bias 128 0
295
+ conv_id.0.message_building_layer.codebook_random_rotations 0 12800
296
+ conv_id.0.message_passing_layers.0.W_t 1048576 0
297
+ conv_id.0.message_passing_layers.0.b_t 1024 0
298
+ conv_id.0.message_passing_layers.0.W_h 1048576 0
299
+ conv_id.0.message_passing_layers.0.theta 1048576 0
300
+ conv_id.0.message_passing_layers.1.W_t 1048576 0
301
+ conv_id.0.message_passing_layers.1.b_t 1024 0
302
+ conv_id.0.message_passing_layers.1.W_h 1048576 0
303
+ conv_id.0.message_passing_layers.1.theta 1048576 0
304
+ conv_id.1.layernorm1.weight 1024 0
305
+ conv_id.1.layernorm1.bias 1024 0
306
+ conv_id.1.ffn_dist.0.weight 131072 0
307
+ conv_id.1.ffn_dist.0.bias 128 0
308
+ conv_id.1.ffn_dist.2.weight 16384 0
309
+ conv_id.1.ffn_dist.2.bias 128 0
310
+ conv_id.1.ffn_dist.4.weight 16384 0
311
+ conv_id.1.ffn_dist.4.bias 128 0
312
+ conv_id.1.message_building_layer.codebook_random_rotations 0 12800
313
+ conv_id.1.message_passing_layers.0.W_t 1048576 0
314
+ conv_id.1.message_passing_layers.0.b_t 1024 0
315
+ conv_id.1.message_passing_layers.0.W_h 1048576 0
316
+ conv_id.1.message_passing_layers.0.theta 1048576 0
317
+ conv_id.1.message_passing_layers.1.W_t 1048576 0
318
+ conv_id.1.message_passing_layers.1.b_t 1024 0
319
+ conv_id.1.message_passing_layers.1.W_h 1048576 0
320
+ conv_id.1.message_passing_layers.1.theta 1048576 0
321
+ conv_id.2.layernorm1.weight 1024 0
322
+ conv_id.2.layernorm1.bias 1024 0
323
+ conv_id.2.ffn_dist.0.weight 131072 0
324
+ conv_id.2.ffn_dist.0.bias 128 0
325
+ conv_id.2.ffn_dist.2.weight 16384 0
326
+ conv_id.2.ffn_dist.2.bias 128 0
327
+ conv_id.2.ffn_dist.4.weight 16384 0
328
+ conv_id.2.ffn_dist.4.bias 128 0
329
+ conv_id.2.message_building_layer.codebook_random_rotations 0 12800
330
+ conv_id.2.message_passing_layers.0.W_t 1048576 0
331
+ conv_id.2.message_passing_layers.0.b_t 1024 0
332
+ conv_id.2.message_passing_layers.0.W_h 1048576 0
333
+ conv_id.2.message_passing_layers.0.theta 1048576 0
334
+ conv_id.2.message_passing_layers.1.W_t 1048576 0
335
+ conv_id.2.message_passing_layers.1.b_t 1024 0
336
+ conv_id.2.message_passing_layers.1.W_h 1048576 0
337
+ conv_id.2.message_passing_layers.1.theta 1048576 0
338
+ conv_reg.0.layernorm1.weight 1024 0
339
+ conv_reg.0.layernorm1.bias 1024 0
340
+ conv_reg.0.ffn_dist.0.weight 131072 0
341
+ conv_reg.0.ffn_dist.0.bias 128 0
342
+ conv_reg.0.ffn_dist.2.weight 16384 0
343
+ conv_reg.0.ffn_dist.2.bias 128 0
344
+ conv_reg.0.ffn_dist.4.weight 16384 0
345
+ conv_reg.0.ffn_dist.4.bias 128 0
346
+ conv_reg.0.message_building_layer.codebook_random_rotations 0 12800
347
+ conv_reg.0.message_passing_layers.0.W_t 1048576 0
348
+ conv_reg.0.message_passing_layers.0.b_t 1024 0
349
+ conv_reg.0.message_passing_layers.0.W_h 1048576 0
350
+ conv_reg.0.message_passing_layers.0.theta 1048576 0
351
+ conv_reg.0.message_passing_layers.1.W_t 1048576 0
352
+ conv_reg.0.message_passing_layers.1.b_t 1024 0
353
+ conv_reg.0.message_passing_layers.1.W_h 1048576 0
354
+ conv_reg.0.message_passing_layers.1.theta 1048576 0
355
+ conv_reg.1.layernorm1.weight 1024 0
356
+ conv_reg.1.layernorm1.bias 1024 0
357
+ conv_reg.1.ffn_dist.0.weight 131072 0
358
+ conv_reg.1.ffn_dist.0.bias 128 0
359
+ conv_reg.1.ffn_dist.2.weight 16384 0
360
+ conv_reg.1.ffn_dist.2.bias 128 0
361
+ conv_reg.1.ffn_dist.4.weight 16384 0
362
+ conv_reg.1.ffn_dist.4.bias 128 0
363
+ conv_reg.1.message_building_layer.codebook_random_rotations 0 12800
364
+ conv_reg.1.message_passing_layers.0.W_t 1048576 0
365
+ conv_reg.1.message_passing_layers.0.b_t 1024 0
366
+ conv_reg.1.message_passing_layers.0.W_h 1048576 0
367
+ conv_reg.1.message_passing_layers.0.theta 1048576 0
368
+ conv_reg.1.message_passing_layers.1.W_t 1048576 0
369
+ conv_reg.1.message_passing_layers.1.b_t 1024 0
370
+ conv_reg.1.message_passing_layers.1.W_h 1048576 0
371
+ conv_reg.1.message_passing_layers.1.theta 1048576 0
372
+ conv_reg.2.layernorm1.weight 1024 0
373
+ conv_reg.2.layernorm1.bias 1024 0
374
+ conv_reg.2.ffn_dist.0.weight 131072 0
375
+ conv_reg.2.ffn_dist.0.bias 128 0
376
+ conv_reg.2.ffn_dist.2.weight 16384 0
377
+ conv_reg.2.ffn_dist.2.bias 128 0
378
+ conv_reg.2.ffn_dist.4.weight 16384 0
379
+ conv_reg.2.ffn_dist.4.bias 128 0
380
+ conv_reg.2.message_building_layer.codebook_random_rotations 0 12800
381
+ conv_reg.2.message_passing_layers.0.W_t 1048576 0
382
+ conv_reg.2.message_passing_layers.0.b_t 1024 0
383
+ conv_reg.2.message_passing_layers.0.W_h 1048576 0
384
+ conv_reg.2.message_passing_layers.0.theta 1048576 0
385
+ conv_reg.2.message_passing_layers.1.W_t 1048576 0
386
+ conv_reg.2.message_passing_layers.1.b_t 1024 0
387
+ conv_reg.2.message_passing_layers.1.W_h 1048576 0
388
+ conv_reg.2.message_passing_layers.1.theta 1048576 0
389
+ nn_binary_particle.0.weight 1048576 0
390
+ nn_binary_particle.0.bias 1024 0
391
+ nn_binary_particle.2.weight 1024 0
392
+ nn_binary_particle.2.bias 1024 0
393
+ nn_binary_particle.4.weight 2048 0
394
+ nn_binary_particle.4.bias 2 0
395
+ nn_pid.0.weight 1048576 0
396
+ nn_pid.0.bias 1024 0
397
+ nn_pid.2.weight 1024 0
398
+ nn_pid.2.bias 1024 0
399
+ nn_pid.4.weight 6144 0
400
+ nn_pid.4.bias 6 0
401
+ nn_pu.0.weight 1048576 0
402
+ nn_pu.0.bias 1024 0
403
+ nn_pu.2.weight 1024 0
404
+ nn_pu.2.bias 1024 0
405
+ nn_pu.4.weight 1024 0
406
+ nn_pu.4.bias 1 0
407
+ nn_pt.nn.0.0.weight 1048576 0
408
+ nn_pt.nn.0.0.bias 1024 0
409
+ nn_pt.nn.0.2.weight 1024 0
410
+ nn_pt.nn.0.2.bias 1024 0
411
+ nn_pt.nn.0.4.weight 1024 0
412
+ nn_pt.nn.0.4.bias 1 0
413
+ nn_pt.nn.1.0.weight 1048576 0
414
+ nn_pt.nn.1.0.bias 1024 0
415
+ nn_pt.nn.1.2.weight 1024 0
416
+ nn_pt.nn.1.2.bias 1024 0
417
+ nn_pt.nn.1.4.weight 1024 0
418
+ nn_pt.nn.1.4.bias 1 0
419
+ nn_eta.nn.0.weight 1048576 0
420
+ nn_eta.nn.0.bias 1024 0
421
+ nn_eta.nn.2.weight 1024 0
422
+ nn_eta.nn.2.bias 1024 0
423
+ nn_eta.nn.4.weight 2048 0
424
+ nn_eta.nn.4.bias 2 0
425
+ nn_sin_phi.nn.0.weight 1048576 0
426
+ nn_sin_phi.nn.0.bias 1024 0
427
+ nn_sin_phi.nn.2.weight 1024 0
428
+ nn_sin_phi.nn.2.bias 1024 0
429
+ nn_sin_phi.nn.4.weight 2048 0
430
+ nn_sin_phi.nn.4.bias 2 0
431
+ nn_cos_phi.nn.0.weight 1048576 0
432
+ nn_cos_phi.nn.0.bias 1024 0
433
+ nn_cos_phi.nn.2.weight 1024 0
434
+ nn_cos_phi.nn.2.bias 1024 0
435
+ nn_cos_phi.nn.4.weight 2048 0
436
+ nn_cos_phi.nn.4.bias 2 0
437
+ nn_energy.nn.0.0.weight 1048576 0
438
+ nn_energy.nn.0.0.bias 1024 0
439
+ nn_energy.nn.0.2.weight 1024 0
440
+ nn_energy.nn.0.2.bias 1024 0
441
+ nn_energy.nn.0.4.weight 1024 0
442
+ nn_energy.nn.0.4.bias 1 0
443
+ nn_energy.nn.1.0.weight 1048576 0
444
+ nn_energy.nn.1.0.bias 1024 0
445
+ nn_energy.nn.1.2.weight 1024 0
446
+ nn_energy.nn.1.2.bias 1024 0
447
+ nn_energy.nn.1.4.weight 1024 0
448
+ nn_energy.nn.1.4.bias 1 0
449
+ [2025-02-04 08:16:16,482] INFO: Modules Trainable parameters Non-trainable parameters
450
+ nn0_id.0.0.weight 17408 0
451
+ nn0_id.0.0.bias 1024 0
452
+ nn0_id.0.2.weight 1024 0
453
+ nn0_id.0.2.bias 1024 0
454
+ nn0_id.0.4.weight 1048576 0
455
+ nn0_id.0.4.bias 1024 0
456
+ nn0_id.1.0.weight 17408 0
457
+ nn0_id.1.0.bias 1024 0
458
+ nn0_id.1.2.weight 1024 0
459
+ nn0_id.1.2.bias 1024 0
460
+ nn0_id.1.4.weight 1048576 0
461
+ nn0_id.1.4.bias 1024 0
462
+ nn0_reg.0.0.weight 17408 0
463
+ nn0_reg.0.0.bias 1024 0
464
+ nn0_reg.0.2.weight 1024 0
465
+ nn0_reg.0.2.bias 1024 0
466
+ nn0_reg.0.4.weight 1048576 0
467
+ nn0_reg.0.4.bias 1024 0
468
+ nn0_reg.1.0.weight 17408 0
469
+ nn0_reg.1.0.bias 1024 0
470
+ nn0_reg.1.2.weight 1024 0
471
+ nn0_reg.1.2.bias 1024 0
472
+ nn0_reg.1.4.weight 1048576 0
473
+ nn0_reg.1.4.bias 1024 0
474
+ conv_id.0.layernorm1.weight 1024 0
475
+ conv_id.0.layernorm1.bias 1024 0
476
+ conv_id.0.ffn_dist.0.weight 131072 0
477
+ conv_id.0.ffn_dist.0.bias 128 0
478
+ conv_id.0.ffn_dist.2.weight 16384 0
479
+ conv_id.0.ffn_dist.2.bias 128 0
480
+ conv_id.0.ffn_dist.4.weight 16384 0
481
+ conv_id.0.ffn_dist.4.bias 128 0
482
+ conv_id.0.message_building_layer.codebook_random_rotations 0 12800
483
+ conv_id.0.message_passing_layers.0.W_t 1048576 0
484
+ conv_id.0.message_passing_layers.0.b_t 1024 0
485
+ conv_id.0.message_passing_layers.0.W_h 1048576 0
486
+ conv_id.0.message_passing_layers.0.theta 1048576 0
487
+ conv_id.0.message_passing_layers.1.W_t 1048576 0
488
+ conv_id.0.message_passing_layers.1.b_t 1024 0
489
+ conv_id.0.message_passing_layers.1.W_h 1048576 0
490
+ conv_id.0.message_passing_layers.1.theta 1048576 0
491
+ conv_id.1.layernorm1.weight 1024 0
492
+ conv_id.1.layernorm1.bias 1024 0
493
+ conv_id.1.ffn_dist.0.weight 131072 0
494
+ conv_id.1.ffn_dist.0.bias 128 0
495
+ conv_id.1.ffn_dist.2.weight 16384 0
496
+ conv_id.1.ffn_dist.2.bias 128 0
497
+ conv_id.1.ffn_dist.4.weight 16384 0
498
+ conv_id.1.ffn_dist.4.bias 128 0
499
+ conv_id.1.message_building_layer.codebook_random_rotations 0 12800
500
+ conv_id.1.message_passing_layers.0.W_t 1048576 0
501
+ conv_id.1.message_passing_layers.0.b_t 1024 0
502
+ conv_id.1.message_passing_layers.0.W_h 1048576 0
503
+ conv_id.1.message_passing_layers.0.theta 1048576 0
504
+ conv_id.1.message_passing_layers.1.W_t 1048576 0
505
+ conv_id.1.message_passing_layers.1.b_t 1024 0
506
+ conv_id.1.message_passing_layers.1.W_h 1048576 0
507
+ conv_id.1.message_passing_layers.1.theta 1048576 0
508
+ conv_id.2.layernorm1.weight 1024 0
509
+ conv_id.2.layernorm1.bias 1024 0
510
+ conv_id.2.ffn_dist.0.weight 131072 0
511
+ conv_id.2.ffn_dist.0.bias 128 0
512
+ conv_id.2.ffn_dist.2.weight 16384 0
513
+ conv_id.2.ffn_dist.2.bias 128 0
514
+ conv_id.2.ffn_dist.4.weight 16384 0
515
+ conv_id.2.ffn_dist.4.bias 128 0
516
+ conv_id.2.message_building_layer.codebook_random_rotations 0 12800
517
+ conv_id.2.message_passing_layers.0.W_t 1048576 0
518
+ conv_id.2.message_passing_layers.0.b_t 1024 0
519
+ conv_id.2.message_passing_layers.0.W_h 1048576 0
520
+ conv_id.2.message_passing_layers.0.theta 1048576 0
521
+ conv_id.2.message_passing_layers.1.W_t 1048576 0
522
+ conv_id.2.message_passing_layers.1.b_t 1024 0
523
+ conv_id.2.message_passing_layers.1.W_h 1048576 0
524
+ conv_id.2.message_passing_layers.1.theta 1048576 0
525
+ conv_reg.0.layernorm1.weight 1024 0
526
+ conv_reg.0.layernorm1.bias 1024 0
527
+ conv_reg.0.ffn_dist.0.weight 131072 0
528
+ conv_reg.0.ffn_dist.0.bias 128 0
529
+ conv_reg.0.ffn_dist.2.weight 16384 0
530
+ conv_reg.0.ffn_dist.2.bias 128 0
531
+ conv_reg.0.ffn_dist.4.weight 16384 0
532
+ conv_reg.0.ffn_dist.4.bias 128 0
533
+ conv_reg.0.message_building_layer.codebook_random_rotations 0 12800
534
+ conv_reg.0.message_passing_layers.0.W_t 1048576 0
535
+ conv_reg.0.message_passing_layers.0.b_t 1024 0
536
+ conv_reg.0.message_passing_layers.0.W_h 1048576 0
537
+ conv_reg.0.message_passing_layers.0.theta 1048576 0
538
+ conv_reg.0.message_passing_layers.1.W_t 1048576 0
539
+ conv_reg.0.message_passing_layers.1.b_t 1024 0
540
+ conv_reg.0.message_passing_layers.1.W_h 1048576 0
541
+ conv_reg.0.message_passing_layers.1.theta 1048576 0
542
+ conv_reg.1.layernorm1.weight 1024 0
543
+ conv_reg.1.layernorm1.bias 1024 0
544
+ conv_reg.1.ffn_dist.0.weight 131072 0
545
+ conv_reg.1.ffn_dist.0.bias 128 0
546
+ conv_reg.1.ffn_dist.2.weight 16384 0
547
+ conv_reg.1.ffn_dist.2.bias 128 0
548
+ conv_reg.1.ffn_dist.4.weight 16384 0
549
+ conv_reg.1.ffn_dist.4.bias 128 0
550
+ conv_reg.1.message_building_layer.codebook_random_rotations 0 12800
551
+ conv_reg.1.message_passing_layers.0.W_t 1048576 0
552
+ conv_reg.1.message_passing_layers.0.b_t 1024 0
553
+ conv_reg.1.message_passing_layers.0.W_h 1048576 0
554
+ conv_reg.1.message_passing_layers.0.theta 1048576 0
555
+ conv_reg.1.message_passing_layers.1.W_t 1048576 0
556
+ conv_reg.1.message_passing_layers.1.b_t 1024 0
557
+ conv_reg.1.message_passing_layers.1.W_h 1048576 0
558
+ conv_reg.1.message_passing_layers.1.theta 1048576 0
559
+ conv_reg.2.layernorm1.weight 1024 0
560
+ conv_reg.2.layernorm1.bias 1024 0
561
+ conv_reg.2.ffn_dist.0.weight 131072 0
562
+ conv_reg.2.ffn_dist.0.bias 128 0
563
+ conv_reg.2.ffn_dist.2.weight 16384 0
564
+ conv_reg.2.ffn_dist.2.bias 128 0
565
+ conv_reg.2.ffn_dist.4.weight 16384 0
566
+ conv_reg.2.ffn_dist.4.bias 128 0
567
+ conv_reg.2.message_building_layer.codebook_random_rotations 0 12800
568
+ conv_reg.2.message_passing_layers.0.W_t 1048576 0
569
+ conv_reg.2.message_passing_layers.0.b_t 1024 0
570
+ conv_reg.2.message_passing_layers.0.W_h 1048576 0
571
+ conv_reg.2.message_passing_layers.0.theta 1048576 0
572
+ conv_reg.2.message_passing_layers.1.W_t 1048576 0
573
+ conv_reg.2.message_passing_layers.1.b_t 1024 0
574
+ conv_reg.2.message_passing_layers.1.W_h 1048576 0
575
+ conv_reg.2.message_passing_layers.1.theta 1048576 0
576
+ nn_binary_particle.0.weight 1048576 0
577
+ nn_binary_particle.0.bias 1024 0
578
+ nn_binary_particle.2.weight 1024 0
579
+ nn_binary_particle.2.bias 1024 0
580
+ nn_binary_particle.4.weight 2048 0
581
+ nn_binary_particle.4.bias 2 0
582
+ nn_pid.0.weight 1048576 0
583
+ nn_pid.0.bias 1024 0
584
+ nn_pid.2.weight 1024 0
585
+ nn_pid.2.bias 1024 0
586
+ nn_pid.4.weight 6144 0
587
+ nn_pid.4.bias 6 0
588
+ nn_pu.0.weight 1048576 0
589
+ nn_pu.0.bias 1024 0
590
+ nn_pu.2.weight 1024 0
591
+ nn_pu.2.bias 1024 0
592
+ nn_pu.4.weight 1024 0
593
+ nn_pu.4.bias 1 0
594
+ nn_pt.nn.0.0.weight 1048576 0
595
+ nn_pt.nn.0.0.bias 1024 0
596
+ nn_pt.nn.0.2.weight 1024 0
597
+ nn_pt.nn.0.2.bias 1024 0
598
+ nn_pt.nn.0.4.weight 1024 0
599
+ nn_pt.nn.0.4.bias 1 0
600
+ nn_pt.nn.1.0.weight 1048576 0
601
+ nn_pt.nn.1.0.bias 1024 0
602
+ nn_pt.nn.1.2.weight 1024 0
603
+ nn_pt.nn.1.2.bias 1024 0
604
+ nn_pt.nn.1.4.weight 1024 0
605
+ nn_pt.nn.1.4.bias 1 0
606
+ nn_eta.nn.0.weight 1048576 0
607
+ nn_eta.nn.0.bias 1024 0
608
+ nn_eta.nn.2.weight 1024 0
609
+ nn_eta.nn.2.bias 1024 0
610
+ nn_eta.nn.4.weight 2048 0
611
+ nn_eta.nn.4.bias 2 0
612
+ nn_sin_phi.nn.0.weight 1048576 0
613
+ nn_sin_phi.nn.0.bias 1024 0
614
+ nn_sin_phi.nn.2.weight 1024 0
615
+ nn_sin_phi.nn.2.bias 1024 0
616
+ nn_sin_phi.nn.4.weight 2048 0
617
+ nn_sin_phi.nn.4.bias 2 0
618
+ nn_cos_phi.nn.0.weight 1048576 0
619
+ nn_cos_phi.nn.0.bias 1024 0
620
+ nn_cos_phi.nn.2.weight 1024 0
621
+ nn_cos_phi.nn.2.bias 1024 0
622
+ nn_cos_phi.nn.4.weight 2048 0
623
+ nn_cos_phi.nn.4.bias 2 0
624
+ nn_energy.nn.0.0.weight 1048576 0
625
+ nn_energy.nn.0.0.bias 1024 0
626
+ nn_energy.nn.0.2.weight 1024 0
627
+ nn_energy.nn.0.2.bias 1024 0
628
+ nn_energy.nn.0.4.weight 1024 0
629
+ nn_energy.nn.0.4.bias 1 0
630
+ nn_energy.nn.1.0.weight 1048576 0
631
+ nn_energy.nn.1.0.bias 1024 0
632
+ nn_energy.nn.1.2.weight 1024 0
633
+ nn_energy.nn.1.2.bias 1024 0
634
+ nn_energy.nn.1.4.weight 1024 0
635
+ nn_energy.nn.1.4.bias 1 0
636
+ [2025-02-04 08:16:16,483] INFO: Creating experiment dir experiments/pyg-clic_20250204_081614_352844
637
+ [2025-02-04 08:16:16,483] INFO: Creating experiment dir experiments/pyg-clic_20250204_081614_352844
638
+ [2025-02-04 08:16:16,483] INFO: Model directory experiments/pyg-clic_20250204_081614_352844
639
+ [2025-02-04 08:16:16,483] INFO: Model directory experiments/pyg-clic_20250204_081614_352844
640
+ [2025-02-04 08:16:21,637] INFO: train_dataset: clic_edm_qq_pf, 719492
641
+ [2025-02-04 08:16:21,637] INFO: train_dataset: clic_edm_qq_pf, 719492
642
+ [2025-02-04 08:16:21,650] INFO: train_dataset: clic_edm_qq_pf, 719490
643
+ [2025-02-04 08:16:21,650] INFO: train_dataset: clic_edm_qq_pf, 719490
644
+ [2025-02-04 08:16:21,662] INFO: train_dataset: clic_edm_qq_pf, 719489
645
+ [2025-02-04 08:16:21,662] INFO: train_dataset: clic_edm_qq_pf, 719489
646
+ [2025-02-04 08:16:21,674] INFO: train_dataset: clic_edm_qq_pf, 719515
647
+ [2025-02-04 08:16:21,674] INFO: train_dataset: clic_edm_qq_pf, 719515
648
+ [2025-02-04 08:16:21,688] INFO: train_dataset: clic_edm_qq_pf, 719510
649
+ [2025-02-04 08:16:21,688] INFO: train_dataset: clic_edm_qq_pf, 719510
650
+ [2025-02-04 08:16:21,700] INFO: train_dataset: clic_edm_qq_pf, 719503
651
+ [2025-02-04 08:16:21,700] INFO: train_dataset: clic_edm_qq_pf, 719503
652
+ [2025-02-04 08:16:21,712] INFO: train_dataset: clic_edm_qq_pf, 719509
653
+ [2025-02-04 08:16:21,712] INFO: train_dataset: clic_edm_qq_pf, 719509
654
+ [2025-02-04 08:16:21,724] INFO: train_dataset: clic_edm_qq_pf, 719484
655
+ [2025-02-04 08:16:21,724] INFO: train_dataset: clic_edm_qq_pf, 719484
656
+ [2025-02-04 08:16:21,736] INFO: train_dataset: clic_edm_qq_pf, 719474
657
+ [2025-02-04 08:16:21,736] INFO: train_dataset: clic_edm_qq_pf, 719474
658
+ [2025-02-04 08:16:21,748] INFO: train_dataset: clic_edm_qq_pf, 720386
659
+ [2025-02-04 08:16:21,748] INFO: train_dataset: clic_edm_qq_pf, 720386
660
+ [2025-02-04 08:16:21,767] INFO: train_dataset: clic_edm_ttbar_pf, 713900
661
+ [2025-02-04 08:16:21,767] INFO: train_dataset: clic_edm_ttbar_pf, 713900
662
+ [2025-02-04 08:16:21,786] INFO: train_dataset: clic_edm_ttbar_pf, 713900
663
+ [2025-02-04 08:16:21,786] INFO: train_dataset: clic_edm_ttbar_pf, 713900
664
+ [2025-02-04 08:16:21,807] INFO: train_dataset: clic_edm_ttbar_pf, 713900
665
+ [2025-02-04 08:16:21,807] INFO: train_dataset: clic_edm_ttbar_pf, 713900
666
+ [2025-02-04 08:16:21,827] INFO: train_dataset: clic_edm_ttbar_pf, 713900
667
+ [2025-02-04 08:16:21,827] INFO: train_dataset: clic_edm_ttbar_pf, 713900
668
+ [2025-02-04 08:16:21,846] INFO: train_dataset: clic_edm_ttbar_pf, 713900
669
+ [2025-02-04 08:16:21,846] INFO: train_dataset: clic_edm_ttbar_pf, 713900
670
+ [2025-02-04 08:16:21,865] INFO: train_dataset: clic_edm_ttbar_pf, 713900
671
+ [2025-02-04 08:16:21,865] INFO: train_dataset: clic_edm_ttbar_pf, 713900
672
+ [2025-02-04 08:16:21,884] INFO: train_dataset: clic_edm_ttbar_pf, 713900
673
+ [2025-02-04 08:16:21,884] INFO: train_dataset: clic_edm_ttbar_pf, 713900
674
+ [2025-02-04 08:16:21,903] INFO: train_dataset: clic_edm_ttbar_pf, 713900
675
+ [2025-02-04 08:16:21,903] INFO: train_dataset: clic_edm_ttbar_pf, 713900
676
+ [2025-02-04 08:16:21,925] INFO: train_dataset: clic_edm_ttbar_pf, 713900
677
+ [2025-02-04 08:16:21,925] INFO: train_dataset: clic_edm_ttbar_pf, 713900
678
+ [2025-02-04 08:16:21,944] INFO: train_dataset: clic_edm_ttbar_pf, 714700
679
+ [2025-02-04 08:16:21,944] INFO: train_dataset: clic_edm_ttbar_pf, 714700
680
+ [2025-02-04 08:16:21,956] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
681
+ [2025-02-04 08:16:21,956] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
682
+ [2025-02-04 08:16:21,968] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
683
+ [2025-02-04 08:16:21,968] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
684
+ [2025-02-04 08:16:21,980] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
685
+ [2025-02-04 08:16:21,980] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
686
+ [2025-02-04 08:16:21,992] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
687
+ [2025-02-04 08:16:21,992] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
688
+ [2025-02-04 08:16:22,005] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
689
+ [2025-02-04 08:16:22,005] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
690
+ [2025-02-04 08:16:22,016] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
691
+ [2025-02-04 08:16:22,016] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
692
+ [2025-02-04 08:16:22,028] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
693
+ [2025-02-04 08:16:22,028] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
694
+ [2025-02-04 08:16:22,040] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
695
+ [2025-02-04 08:16:22,040] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
696
+ [2025-02-04 08:16:22,053] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
697
+ [2025-02-04 08:16:22,053] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720000
698
+ [2025-02-04 08:16:22,065] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720700
699
+ [2025-02-04 08:16:22,065] INFO: train_dataset: clic_edm_ww_fullhad_pf, 720700
700
+ [2025-02-04 08:16:23,297] INFO: valid_dataset: clic_edm_qq_pf, 79948
701
+ [2025-02-04 08:16:23,297] INFO: valid_dataset: clic_edm_qq_pf, 79948
702
+ [2025-02-04 08:16:23,302] INFO: valid_dataset: clic_edm_qq_pf, 79950
703
+ [2025-02-04 08:16:23,302] INFO: valid_dataset: clic_edm_qq_pf, 79950
704
+ [2025-02-04 08:16:23,308] INFO: valid_dataset: clic_edm_qq_pf, 79939
705
+ [2025-02-04 08:16:23,308] INFO: valid_dataset: clic_edm_qq_pf, 79939
706
+ [2025-02-04 08:16:23,313] INFO: valid_dataset: clic_edm_qq_pf, 79939
707
+ [2025-02-04 08:16:23,313] INFO: valid_dataset: clic_edm_qq_pf, 79939
708
+ [2025-02-04 08:16:23,318] INFO: valid_dataset: clic_edm_qq_pf, 79950
709
+ [2025-02-04 08:16:23,318] INFO: valid_dataset: clic_edm_qq_pf, 79950
710
+ [2025-02-04 08:16:23,323] INFO: valid_dataset: clic_edm_qq_pf, 79950
711
+ [2025-02-04 08:16:23,323] INFO: valid_dataset: clic_edm_qq_pf, 79950
712
+ [2025-02-04 08:16:23,327] INFO: valid_dataset: clic_edm_qq_pf, 79938
713
+ [2025-02-04 08:16:23,327] INFO: valid_dataset: clic_edm_qq_pf, 79938
714
+ [2025-02-04 08:16:23,332] INFO: valid_dataset: clic_edm_qq_pf, 79957
715
+ [2025-02-04 08:16:23,332] INFO: valid_dataset: clic_edm_qq_pf, 79957
716
+ [2025-02-04 08:16:23,337] INFO: valid_dataset: clic_edm_qq_pf, 79955
717
+ [2025-02-04 08:16:23,337] INFO: valid_dataset: clic_edm_qq_pf, 79955
718
+ [2025-02-04 08:16:23,342] INFO: valid_dataset: clic_edm_qq_pf, 80035
719
+ [2025-02-04 08:16:23,342] INFO: valid_dataset: clic_edm_qq_pf, 80035
720
+ [2025-02-04 08:16:23,348] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
721
+ [2025-02-04 08:16:23,348] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
722
+ [2025-02-04 08:16:23,355] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
723
+ [2025-02-04 08:16:23,355] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
724
+ [2025-02-04 08:16:23,362] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
725
+ [2025-02-04 08:16:23,362] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
726
+ [2025-02-04 08:16:23,502] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
727
+ [2025-02-04 08:16:23,502] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
728
+ [2025-02-04 08:16:23,511] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
729
+ [2025-02-04 08:16:23,511] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
730
+ [2025-02-04 08:16:23,518] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
731
+ [2025-02-04 08:16:23,518] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
732
+ [2025-02-04 08:16:23,525] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
733
+ [2025-02-04 08:16:23,525] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
734
+ [2025-02-04 08:16:23,533] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
735
+ [2025-02-04 08:16:23,533] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
736
+ [2025-02-04 08:16:23,539] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
737
+ [2025-02-04 08:16:23,539] INFO: valid_dataset: clic_edm_ttbar_pf, 79300
738
+ [2025-02-04 08:16:23,545] INFO: valid_dataset: clic_edm_ttbar_pf, 79700
739
+ [2025-02-04 08:16:23,545] INFO: valid_dataset: clic_edm_ttbar_pf, 79700
740
+ [2025-02-04 08:16:23,552] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
741
+ [2025-02-04 08:16:23,552] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
742
+ [2025-02-04 08:16:23,558] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
743
+ [2025-02-04 08:16:23,558] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
744
+ [2025-02-04 08:16:23,565] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
745
+ [2025-02-04 08:16:23,565] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
746
+ [2025-02-04 08:16:23,570] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
747
+ [2025-02-04 08:16:23,570] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
748
+ [2025-02-04 08:16:23,575] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
749
+ [2025-02-04 08:16:23,575] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
750
+ [2025-02-04 08:16:23,581] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
751
+ [2025-02-04 08:16:23,581] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
752
+ [2025-02-04 08:16:23,586] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
753
+ [2025-02-04 08:16:23,586] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
754
+ [2025-02-04 08:16:23,592] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
755
+ [2025-02-04 08:16:23,592] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
756
+ [2025-02-04 08:16:23,597] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
757
+ [2025-02-04 08:16:23,597] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80000
758
+ [2025-02-04 08:16:23,603] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80100
759
+ [2025-02-04 08:16:23,603] INFO: valid_dataset: clic_edm_ww_fullhad_pf, 80100
760
+ [2025-02-06 06:49:31,898] INFO: Rank 0: epoch=1/10 train_loss=5.1229 valid_loss=4.8985 stale=0 epoch_train_time=2768.98m epoch_valid_time=24.13m epoch_total_time=2793.11m eta=25138.2m
761
+ [2025-02-06 06:49:31,898] INFO: Rank 0: epoch=1/10 train_loss=5.1229 valid_loss=4.8985 stale=0 epoch_train_time=2768.98m epoch_valid_time=24.13m epoch_total_time=2793.11m eta=25138.2m
762
+ [2025-02-06 06:49:31,908] INFO: split_configs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
763
+ [2025-02-06 06:49:31,908] INFO: split_configs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
764
+ [2025-02-06 06:49:32,016] INFO: test_dataset: clic_edm_qq_pf, 2000
765
+ [2025-02-06 06:49:32,016] INFO: test_dataset: clic_edm_qq_pf, 2000
766
+ [2025-02-06 06:49:32,027] INFO: Running predictions on clic_edm_qq_pf
767
+ [2025-02-06 06:49:32,027] INFO: Running predictions on clic_edm_qq_pf
768
+ [2025-02-06 06:49:35,361] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_0.parquet
769
+ [2025-02-06 06:49:35,361] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_0.parquet
770
+ [2025-02-06 06:49:35,858] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_1.parquet
771
+ [2025-02-06 06:49:35,858] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_1.parquet
772
+ [2025-02-06 06:49:36,326] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_2.parquet
773
+ [2025-02-06 06:49:36,326] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_2.parquet
774
+ [2025-02-06 06:49:36,796] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_3.parquet
775
+ [2025-02-06 06:49:36,796] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_3.parquet
776
+ [2025-02-06 06:49:37,321] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_4.parquet
777
+ [2025-02-06 06:49:37,321] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_4.parquet
778
+ [2025-02-06 06:49:37,832] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_5.parquet
779
+ [2025-02-06 06:49:37,832] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_5.parquet
780
+ [2025-02-06 06:49:38,305] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_6.parquet
781
+ [2025-02-06 06:49:38,305] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_6.parquet
782
+ [2025-02-06 06:49:38,752] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_7.parquet
783
+ [2025-02-06 06:49:38,752] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_7.parquet
784
+ [2025-02-06 06:49:38,828] INFO: Time taken to make predictions on device 0 is: 0.11 min
785
+ [2025-02-06 06:49:38,828] INFO: Time taken to make predictions on device 0 is: 0.11 min
786
+ [2025-02-08 05:24:19,735] INFO: Rank 0: epoch=2/10 train_loss=4.8377 valid_loss=5.2062 stale=1 epoch_train_time=2769.77m epoch_valid_time=24.22m epoch_total_time=2793.99m eta=22351.7m
787
+ [2025-02-08 05:24:19,735] INFO: Rank 0: epoch=2/10 train_loss=4.8377 valid_loss=5.2062 stale=1 epoch_train_time=2769.77m epoch_valid_time=24.22m epoch_total_time=2793.99m eta=22351.7m
788
+ [2025-02-08 05:24:19,746] INFO: split_configs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
789
+ [2025-02-08 05:24:19,746] INFO: split_configs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
790
+ [2025-02-08 05:24:19,848] INFO: test_dataset: clic_edm_qq_pf, 2000
791
+ [2025-02-08 05:24:19,848] INFO: test_dataset: clic_edm_qq_pf, 2000
792
+ [2025-02-08 05:24:19,852] INFO: Running predictions on clic_edm_qq_pf
793
+ [2025-02-08 05:24:19,852] INFO: Running predictions on clic_edm_qq_pf
794
+ [2025-02-08 05:24:21,022] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_0.parquet
795
+ [2025-02-08 05:24:21,022] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_0.parquet
796
+ [2025-02-08 05:24:21,482] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_1.parquet
797
+ [2025-02-08 05:24:21,482] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_1.parquet
798
+ [2025-02-08 05:24:21,958] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_2.parquet
799
+ [2025-02-08 05:24:21,958] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_2.parquet
800
+ [2025-02-08 05:24:22,535] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_3.parquet
801
+ [2025-02-08 05:24:22,535] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_3.parquet
802
+ [2025-02-08 05:24:23,071] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_4.parquet
803
+ [2025-02-08 05:24:23,071] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_4.parquet
804
+ [2025-02-08 05:24:23,545] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_5.parquet
805
+ [2025-02-08 05:24:23,545] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_5.parquet
806
+ [2025-02-08 05:24:24,031] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_6.parquet
807
+ [2025-02-08 05:24:24,031] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_6.parquet
808
+ [2025-02-08 05:24:24,463] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_7.parquet
809
+ [2025-02-08 05:24:24,463] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_7.parquet
810
+ [2025-02-08 05:24:24,538] INFO: Time taken to make predictions on device 0 is: 0.07 min
811
+ [2025-02-08 05:24:24,538] INFO: Time taken to make predictions on device 0 is: 0.07 min