Joosep Pata
commited on
Commit
·
b278362
1
Parent(s):
8cd2762
added gnnlsh training
Browse files- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/.gitattributes +4 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/batch0_epoch1.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/batch0_epoch2.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/checkpoints/checkpoint-02-5.206203.pth +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/history/epoch_1.json +1 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/history/epoch_2.json +1 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/hyperparameters.json +1 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/model_kwargs.pkl +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/overridden_config.yaml +225 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_0.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_1.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_2.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_3.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_4.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_5.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_6.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_7.parquet +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/runs/train/events.out.tfevents.1738649783.gpu1.local.3067131.0 +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/runs/valid/events.out.tfevents.1738649783.gpu1.local.3067131.1 +3 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/train-config.yaml +225 -0
- clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/train.log +811 -0
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/.gitattributes
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
plots_checkpoint*/** filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
preds_checkpoint*/** filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
runs/** filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
checkpoints/** filter=lfs diff=lfs merge=lfs -text
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/batch0_epoch1.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c224dffffac28a746c270f295e120fc532d4312d69f247eba33436a4db4eb6fd
|
| 3 |
+
size 2531094
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/batch0_epoch2.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48d0d88b9b1f2c78af292b318dc9e592fbe600ab2b1100cbf86b9301a1ea0341
|
| 3 |
+
size 2537425
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/checkpoints/checkpoint-02-5.206203.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d0afdd229f9b76f39ad18bb1bbf8e11e8dafe292580a59340392f4c5b75eb84
|
| 3 |
+
size 643435404
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/history/epoch_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"train": {"Regression_pt": 0.25910334646839267, "Regression_eta": 0.0008618684659893994, "Regression_sin_phi": 0.0006182023768865629, "Regression_cos_phi": 0.0006280264262564951, "Regression_energy": 0.24251854639915127, "Classification_binary": 4.559183174642798, "Classification": 0.05994237113451846, "ispu": 8.946376891725996e-06, "MET": 21.453700401778285, "Sliced_Wasserstein_Loss": 65.35367781660842, "Total": 5.122855523262725}, "valid": {"Regression_pt": 0.2624917096143816, "Regression_eta": 0.000599030926991009, "Regression_sin_phi": 0.0005111121437543724, "Regression_cos_phi": 0.0004291858879196283, "Regression_energy": 0.25430431388413394, "Classification_binary": 4.326086829330267, "Classification": 0.05409432865533393, "ispu": 0.0, "MET": 15.170460776184871, "Sliced_Wasserstein_Loss": 61.53729137691238, "Total": 4.898515232160051}, "epoch_train_time": 166139.00810337067, "epoch_valid_time": 1447.8257393836975, "epoch_total_time": 167586.83384537697}
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/history/epoch_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"train": {"Regression_pt": 0.25380500213221835, "Regression_eta": 0.0005493453817338307, "Regression_sin_phi": 0.00038346989650266973, "Regression_cos_phi": 0.0004457221137636458, "Regression_energy": 0.2443389649528683, "Classification_binary": 4.283395962603714, "Classification": 0.05482413586651273, "ispu": 0.0, "MET": 18.887216793856833, "Sliced_Wasserstein_Loss": 62.04546156955043, "Total": 4.837748213988541}, "valid": {"Regression_pt": 0.26054830113438804, "Regression_eta": 0.0005653613874003429, "Regression_sin_phi": 0.00036033755561891466, "Regression_cos_phi": 0.0005135214289229248, "Regression_energy": 0.2509168529173799, "Classification_binary": 4.6393808675243395, "Classification": 0.05391483423198954, "ispu": 0.0, "MET": 22.03946286241575, "Sliced_Wasserstein_Loss": 63.26808066759388, "Total": 5.206203360703969}, "epoch_train_time": 166185.94912409782, "epoch_valid_time": 1453.421977519989, "epoch_total_time": 167639.3711025715}
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/hyperparameters.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"num_mlpf_params": 53574931, "train": true, "test": null, "make_plots": null, "comet": true, "save_attention": true, "dataset": "clic", "sort_data": false, "data_dir": "/scratch/persistent/joosep/tensorflow_datasets", "gpus": 1, "gpu_batch_multiplier": 256, "load": null, "finetune": null, "num_epochs": 10, "patience": 20, "lr": 0.0001, "lr_schedule": "cosinedecay", "conv_type": "gnn_lsh", "ntrain": null, "ntest": 2000, "nvalid": null, "num_workers": 8, "prefetch_factor": 100, "checkpoint_freq": 1, "comet_name": "particleflow-pt", "comet_offline": false, "comet_step_freq": 100, "dtype": "bfloat16", "val_freq": null, "model": {"trainable": "all", "learned_representation_mode": "last", "input_encoding": "split", "pt_mode": "direct-elemtype-split", "eta_mode": "linear", "sin_phi_mode": "linear", "cos_phi_mode": "linear", "energy_mode": "direct-elemtype-split", "gnn_lsh": {"conv_type": "gnn_lsh", "embedding_dim": 1024, "width": 1024, "num_convs": 3, "activation": "elu", "bin_size": 32, "max_num_bins": 200, "distance_dim": 128, "layernorm": true, "num_node_messages": 2, "ffn_dist_hidden_dim": 128, "ffn_dist_num_layers": 2}, "attention": {"conv_type": "attention", "num_convs": 3, "dropout_ff": 0.0, "dropout_conv_id_mha": 0.0, "dropout_conv_id_ff": 0.0, "dropout_conv_reg_mha": 0.0, "dropout_conv_reg_ff": 0.0, "activation": "relu", "head_dim": 32, "num_heads": 32, "attention_type": "flash", "use_pre_layernorm": true}, "mamba": {"conv_type": "mamba", "embedding_dim": 128, "width": 128, "num_convs": 2, "dropout": 0.0, "activation": "elu", "num_heads": 2, "d_state": 16, "d_conv": 4, "expand": 2}}, "lr_schedule_config": {"onecycle": {"pct_start": 0.3}}, "raytune": {"local_dir": null, "sched": null, "search_alg": null, "default_metric": "val_loss", "default_mode": "min", "asha": {"max_t": 200, "reduction_factor": 4, "brackets": 1, "grace_period": 10}, "hyperband": {"max_t": 200, "reduction_factor": 4}, "hyperopt": {"n_random_steps": 10}, "nevergrad": {"n_random_steps": 10}}, "train_dataset": {"clic": {"physical": {"batch_size": 1, "samples": {"clic_edm_qq_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ttbar_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ww_fullhad_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}}}}}, "valid_dataset": {"clic": {"physical": {"batch_size": 1, "samples": {"clic_edm_qq_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ttbar_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ww_fullhad_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}}}}}, "test_dataset": {"clic_edm_qq_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ttbar_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, "clic_edm_ww_fullhad_pf": {"version": "2.5.0", "splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}}, "enabled_test_datasets": ["clic_edm_qq_pf"]}
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/model_kwargs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:544045d26763d5a9d2fb2954cd23771043b9263e2c82abaa00210b34553bf24e
|
| 3 |
+
size 491
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/overridden_config.yaml
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
checkpoint_freq: 1
|
| 2 |
+
comet: true
|
| 3 |
+
comet_name: particleflow-pt
|
| 4 |
+
comet_offline: false
|
| 5 |
+
comet_step_freq: 100
|
| 6 |
+
conv_type: gnn_lsh
|
| 7 |
+
data_dir: /scratch/persistent/joosep/tensorflow_datasets
|
| 8 |
+
dataset: clic
|
| 9 |
+
dtype: bfloat16
|
| 10 |
+
enabled_test_datasets:
|
| 11 |
+
- clic_edm_qq_pf
|
| 12 |
+
finetune: null
|
| 13 |
+
gpu_batch_multiplier: 256
|
| 14 |
+
gpus: 1
|
| 15 |
+
load: null
|
| 16 |
+
lr: 0.0001
|
| 17 |
+
lr_schedule: cosinedecay
|
| 18 |
+
lr_schedule_config:
|
| 19 |
+
onecycle:
|
| 20 |
+
pct_start: 0.3
|
| 21 |
+
make_plots: null
|
| 22 |
+
model:
|
| 23 |
+
attention:
|
| 24 |
+
activation: relu
|
| 25 |
+
attention_type: flash
|
| 26 |
+
conv_type: attention
|
| 27 |
+
dropout_conv_id_ff: 0.0
|
| 28 |
+
dropout_conv_id_mha: 0.0
|
| 29 |
+
dropout_conv_reg_ff: 0.0
|
| 30 |
+
dropout_conv_reg_mha: 0.0
|
| 31 |
+
dropout_ff: 0.0
|
| 32 |
+
head_dim: 32
|
| 33 |
+
num_convs: 3
|
| 34 |
+
num_heads: 32
|
| 35 |
+
use_pre_layernorm: true
|
| 36 |
+
cos_phi_mode: linear
|
| 37 |
+
energy_mode: direct-elemtype-split
|
| 38 |
+
eta_mode: linear
|
| 39 |
+
gnn_lsh:
|
| 40 |
+
activation: elu
|
| 41 |
+
bin_size: 32
|
| 42 |
+
conv_type: gnn_lsh
|
| 43 |
+
distance_dim: 128
|
| 44 |
+
embedding_dim: 1024
|
| 45 |
+
ffn_dist_hidden_dim: 128
|
| 46 |
+
ffn_dist_num_layers: 2
|
| 47 |
+
layernorm: true
|
| 48 |
+
max_num_bins: 200
|
| 49 |
+
num_convs: 3
|
| 50 |
+
num_node_messages: 2
|
| 51 |
+
width: 1024
|
| 52 |
+
input_encoding: split
|
| 53 |
+
learned_representation_mode: last
|
| 54 |
+
mamba:
|
| 55 |
+
activation: elu
|
| 56 |
+
conv_type: mamba
|
| 57 |
+
d_conv: 4
|
| 58 |
+
d_state: 16
|
| 59 |
+
dropout: 0.0
|
| 60 |
+
embedding_dim: 128
|
| 61 |
+
expand: 2
|
| 62 |
+
num_convs: 2
|
| 63 |
+
num_heads: 2
|
| 64 |
+
width: 128
|
| 65 |
+
pt_mode: direct-elemtype-split
|
| 66 |
+
sin_phi_mode: linear
|
| 67 |
+
trainable: all
|
| 68 |
+
ntest: 2000
|
| 69 |
+
ntrain: null
|
| 70 |
+
num_epochs: 10
|
| 71 |
+
num_workers: 8
|
| 72 |
+
nvalid: null
|
| 73 |
+
patience: 20
|
| 74 |
+
prefetch_factor: 100
|
| 75 |
+
raytune:
|
| 76 |
+
asha:
|
| 77 |
+
brackets: 1
|
| 78 |
+
grace_period: 10
|
| 79 |
+
max_t: 200
|
| 80 |
+
reduction_factor: 4
|
| 81 |
+
default_metric: val_loss
|
| 82 |
+
default_mode: min
|
| 83 |
+
hyperband:
|
| 84 |
+
max_t: 200
|
| 85 |
+
reduction_factor: 4
|
| 86 |
+
hyperopt:
|
| 87 |
+
n_random_steps: 10
|
| 88 |
+
local_dir: null
|
| 89 |
+
nevergrad:
|
| 90 |
+
n_random_steps: 10
|
| 91 |
+
sched: null
|
| 92 |
+
search_alg: null
|
| 93 |
+
save_attention: true
|
| 94 |
+
sort_data: false
|
| 95 |
+
test: null
|
| 96 |
+
test_dataset:
|
| 97 |
+
clic_edm_qq_pf:
|
| 98 |
+
splits:
|
| 99 |
+
- 1
|
| 100 |
+
- 2
|
| 101 |
+
- 3
|
| 102 |
+
- 4
|
| 103 |
+
- 5
|
| 104 |
+
- 6
|
| 105 |
+
- 7
|
| 106 |
+
- 8
|
| 107 |
+
- 9
|
| 108 |
+
- 10
|
| 109 |
+
version: 2.5.0
|
| 110 |
+
clic_edm_ttbar_pf:
|
| 111 |
+
splits:
|
| 112 |
+
- 1
|
| 113 |
+
- 2
|
| 114 |
+
- 3
|
| 115 |
+
- 4
|
| 116 |
+
- 5
|
| 117 |
+
- 6
|
| 118 |
+
- 7
|
| 119 |
+
- 8
|
| 120 |
+
- 9
|
| 121 |
+
- 10
|
| 122 |
+
version: 2.5.0
|
| 123 |
+
clic_edm_ww_fullhad_pf:
|
| 124 |
+
splits:
|
| 125 |
+
- 1
|
| 126 |
+
- 2
|
| 127 |
+
- 3
|
| 128 |
+
- 4
|
| 129 |
+
- 5
|
| 130 |
+
- 6
|
| 131 |
+
- 7
|
| 132 |
+
- 8
|
| 133 |
+
- 9
|
| 134 |
+
- 10
|
| 135 |
+
version: 2.5.0
|
| 136 |
+
train: true
|
| 137 |
+
train_dataset:
|
| 138 |
+
clic:
|
| 139 |
+
physical:
|
| 140 |
+
batch_size: 1
|
| 141 |
+
samples:
|
| 142 |
+
clic_edm_qq_pf:
|
| 143 |
+
splits:
|
| 144 |
+
- 1
|
| 145 |
+
- 2
|
| 146 |
+
- 3
|
| 147 |
+
- 4
|
| 148 |
+
- 5
|
| 149 |
+
- 6
|
| 150 |
+
- 7
|
| 151 |
+
- 8
|
| 152 |
+
- 9
|
| 153 |
+
- 10
|
| 154 |
+
version: 2.5.0
|
| 155 |
+
clic_edm_ttbar_pf:
|
| 156 |
+
splits:
|
| 157 |
+
- 1
|
| 158 |
+
- 2
|
| 159 |
+
- 3
|
| 160 |
+
- 4
|
| 161 |
+
- 5
|
| 162 |
+
- 6
|
| 163 |
+
- 7
|
| 164 |
+
- 8
|
| 165 |
+
- 9
|
| 166 |
+
- 10
|
| 167 |
+
version: 2.5.0
|
| 168 |
+
clic_edm_ww_fullhad_pf:
|
| 169 |
+
splits:
|
| 170 |
+
- 1
|
| 171 |
+
- 2
|
| 172 |
+
- 3
|
| 173 |
+
- 4
|
| 174 |
+
- 5
|
| 175 |
+
- 6
|
| 176 |
+
- 7
|
| 177 |
+
- 8
|
| 178 |
+
- 9
|
| 179 |
+
- 10
|
| 180 |
+
version: 2.5.0
|
| 181 |
+
val_freq: null
|
| 182 |
+
valid_dataset:
|
| 183 |
+
clic:
|
| 184 |
+
physical:
|
| 185 |
+
batch_size: 1
|
| 186 |
+
samples:
|
| 187 |
+
clic_edm_qq_pf:
|
| 188 |
+
splits:
|
| 189 |
+
- 1
|
| 190 |
+
- 2
|
| 191 |
+
- 3
|
| 192 |
+
- 4
|
| 193 |
+
- 5
|
| 194 |
+
- 6
|
| 195 |
+
- 7
|
| 196 |
+
- 8
|
| 197 |
+
- 9
|
| 198 |
+
- 10
|
| 199 |
+
version: 2.5.0
|
| 200 |
+
clic_edm_ttbar_pf:
|
| 201 |
+
splits:
|
| 202 |
+
- 1
|
| 203 |
+
- 2
|
| 204 |
+
- 3
|
| 205 |
+
- 4
|
| 206 |
+
- 5
|
| 207 |
+
- 6
|
| 208 |
+
- 7
|
| 209 |
+
- 8
|
| 210 |
+
- 9
|
| 211 |
+
- 10
|
| 212 |
+
version: 2.5.0
|
| 213 |
+
clic_edm_ww_fullhad_pf:
|
| 214 |
+
splits:
|
| 215 |
+
- 1
|
| 216 |
+
- 2
|
| 217 |
+
- 3
|
| 218 |
+
- 4
|
| 219 |
+
- 5
|
| 220 |
+
- 6
|
| 221 |
+
- 7
|
| 222 |
+
- 8
|
| 223 |
+
- 9
|
| 224 |
+
- 10
|
| 225 |
+
version: 2.5.0
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_0.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4da42d7ce3c2811160b8875dd27f3e5803ad73e35975e248782ad3a4cc1687d9
|
| 3 |
+
size 3697789
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_1.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75fde31886b73991fdd1b030b8362ce82279384e4f9ca530417dc0ed40004c75
|
| 3 |
+
size 3924659
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_2.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27e2de2441b7d8537b28dec48de817446a0e0e9e3ebc3a860dbacb14d9acdd5d
|
| 3 |
+
size 3749584
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_3.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a81cdb9200535c1700c532b35ee42bf6ada7e0e62f99d938d42245eb949aa083
|
| 3 |
+
size 3892284
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_4.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cc2b7e0511ef25f6ba143d13dc7a1d0937f2860b23ff5aee556514524cc2acf
|
| 3 |
+
size 3958748
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_5.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33b63a7435d52925ae73e12b73434262b48478a85fd7c3c33408c723172aa516
|
| 3 |
+
size 3869607
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_6.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:beb2b7796dbc4e7365c3a70477e1686c063506f7982d6656491563636367bc82
|
| 3 |
+
size 3644752
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_7.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc53e9fda0a37737823dc83ad99127ee2de7d7b1772e8f4dd48fe520095bcc78
|
| 3 |
+
size 3117659
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/runs/train/events.out.tfevents.1738649783.gpu1.local.3067131.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbe281fada73723faad162ab2aabcbeb738ef28af2f43bfac9e527b0a2f8ead5
|
| 3 |
+
size 1047551
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/runs/valid/events.out.tfevents.1738649783.gpu1.local.3067131.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bcce84bbda1cc05e167ec6fff5cf3488c1f7593c3f0939ea8c1d21442010539
|
| 3 |
+
size 966360
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/train-config.yaml
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
checkpoint_freq: 1
|
| 2 |
+
comet: true
|
| 3 |
+
comet_name: particleflow-pt
|
| 4 |
+
comet_offline: false
|
| 5 |
+
comet_step_freq: 100
|
| 6 |
+
conv_type: gnn_lsh
|
| 7 |
+
data_dir: /scratch/persistent/joosep/tensorflow_datasets
|
| 8 |
+
dataset: clic
|
| 9 |
+
dtype: bfloat16
|
| 10 |
+
enabled_test_datasets:
|
| 11 |
+
- clic_edm_qq_pf
|
| 12 |
+
finetune: null
|
| 13 |
+
gpu_batch_multiplier: 256
|
| 14 |
+
gpus: 1
|
| 15 |
+
load: null
|
| 16 |
+
lr: 0.0001
|
| 17 |
+
lr_schedule: cosinedecay
|
| 18 |
+
lr_schedule_config:
|
| 19 |
+
onecycle:
|
| 20 |
+
pct_start: 0.3
|
| 21 |
+
make_plots: null
|
| 22 |
+
model:
|
| 23 |
+
attention:
|
| 24 |
+
activation: relu
|
| 25 |
+
attention_type: flash
|
| 26 |
+
conv_type: attention
|
| 27 |
+
dropout_conv_id_ff: 0.0
|
| 28 |
+
dropout_conv_id_mha: 0.0
|
| 29 |
+
dropout_conv_reg_ff: 0.0
|
| 30 |
+
dropout_conv_reg_mha: 0.0
|
| 31 |
+
dropout_ff: 0.0
|
| 32 |
+
head_dim: 32
|
| 33 |
+
num_convs: 3
|
| 34 |
+
num_heads: 32
|
| 35 |
+
use_pre_layernorm: true
|
| 36 |
+
cos_phi_mode: linear
|
| 37 |
+
energy_mode: direct-elemtype-split
|
| 38 |
+
eta_mode: linear
|
| 39 |
+
gnn_lsh:
|
| 40 |
+
activation: elu
|
| 41 |
+
bin_size: 32
|
| 42 |
+
conv_type: gnn_lsh
|
| 43 |
+
distance_dim: 128
|
| 44 |
+
embedding_dim: 1024
|
| 45 |
+
ffn_dist_hidden_dim: 128
|
| 46 |
+
ffn_dist_num_layers: 2
|
| 47 |
+
layernorm: true
|
| 48 |
+
max_num_bins: 200
|
| 49 |
+
num_convs: 3
|
| 50 |
+
num_node_messages: 2
|
| 51 |
+
width: 1024
|
| 52 |
+
input_encoding: split
|
| 53 |
+
learned_representation_mode: last
|
| 54 |
+
mamba:
|
| 55 |
+
activation: elu
|
| 56 |
+
conv_type: mamba
|
| 57 |
+
d_conv: 4
|
| 58 |
+
d_state: 16
|
| 59 |
+
dropout: 0.0
|
| 60 |
+
embedding_dim: 128
|
| 61 |
+
expand: 2
|
| 62 |
+
num_convs: 2
|
| 63 |
+
num_heads: 2
|
| 64 |
+
width: 128
|
| 65 |
+
pt_mode: direct-elemtype-split
|
| 66 |
+
sin_phi_mode: linear
|
| 67 |
+
trainable: all
|
| 68 |
+
ntest: 2000
|
| 69 |
+
ntrain: null
|
| 70 |
+
num_epochs: 10
|
| 71 |
+
num_workers: 8
|
| 72 |
+
nvalid: null
|
| 73 |
+
patience: 20
|
| 74 |
+
prefetch_factor: 100
|
| 75 |
+
raytune:
|
| 76 |
+
asha:
|
| 77 |
+
brackets: 1
|
| 78 |
+
grace_period: 10
|
| 79 |
+
max_t: 200
|
| 80 |
+
reduction_factor: 4
|
| 81 |
+
default_metric: val_loss
|
| 82 |
+
default_mode: min
|
| 83 |
+
hyperband:
|
| 84 |
+
max_t: 200
|
| 85 |
+
reduction_factor: 4
|
| 86 |
+
hyperopt:
|
| 87 |
+
n_random_steps: 10
|
| 88 |
+
local_dir: null
|
| 89 |
+
nevergrad:
|
| 90 |
+
n_random_steps: 10
|
| 91 |
+
sched: null
|
| 92 |
+
search_alg: null
|
| 93 |
+
save_attention: true
|
| 94 |
+
sort_data: false
|
| 95 |
+
test: null
|
| 96 |
+
test_dataset:
|
| 97 |
+
clic_edm_qq_pf:
|
| 98 |
+
splits:
|
| 99 |
+
- 1
|
| 100 |
+
- 2
|
| 101 |
+
- 3
|
| 102 |
+
- 4
|
| 103 |
+
- 5
|
| 104 |
+
- 6
|
| 105 |
+
- 7
|
| 106 |
+
- 8
|
| 107 |
+
- 9
|
| 108 |
+
- 10
|
| 109 |
+
version: 2.5.0
|
| 110 |
+
clic_edm_ttbar_pf:
|
| 111 |
+
splits:
|
| 112 |
+
- 1
|
| 113 |
+
- 2
|
| 114 |
+
- 3
|
| 115 |
+
- 4
|
| 116 |
+
- 5
|
| 117 |
+
- 6
|
| 118 |
+
- 7
|
| 119 |
+
- 8
|
| 120 |
+
- 9
|
| 121 |
+
- 10
|
| 122 |
+
version: 2.5.0
|
| 123 |
+
clic_edm_ww_fullhad_pf:
|
| 124 |
+
splits:
|
| 125 |
+
- 1
|
| 126 |
+
- 2
|
| 127 |
+
- 3
|
| 128 |
+
- 4
|
| 129 |
+
- 5
|
| 130 |
+
- 6
|
| 131 |
+
- 7
|
| 132 |
+
- 8
|
| 133 |
+
- 9
|
| 134 |
+
- 10
|
| 135 |
+
version: 2.5.0
|
| 136 |
+
train: true
|
| 137 |
+
train_dataset:
|
| 138 |
+
clic:
|
| 139 |
+
physical:
|
| 140 |
+
batch_size: 1
|
| 141 |
+
samples:
|
| 142 |
+
clic_edm_qq_pf:
|
| 143 |
+
splits:
|
| 144 |
+
- 1
|
| 145 |
+
- 2
|
| 146 |
+
- 3
|
| 147 |
+
- 4
|
| 148 |
+
- 5
|
| 149 |
+
- 6
|
| 150 |
+
- 7
|
| 151 |
+
- 8
|
| 152 |
+
- 9
|
| 153 |
+
- 10
|
| 154 |
+
version: 2.5.0
|
| 155 |
+
clic_edm_ttbar_pf:
|
| 156 |
+
splits:
|
| 157 |
+
- 1
|
| 158 |
+
- 2
|
| 159 |
+
- 3
|
| 160 |
+
- 4
|
| 161 |
+
- 5
|
| 162 |
+
- 6
|
| 163 |
+
- 7
|
| 164 |
+
- 8
|
| 165 |
+
- 9
|
| 166 |
+
- 10
|
| 167 |
+
version: 2.5.0
|
| 168 |
+
clic_edm_ww_fullhad_pf:
|
| 169 |
+
splits:
|
| 170 |
+
- 1
|
| 171 |
+
- 2
|
| 172 |
+
- 3
|
| 173 |
+
- 4
|
| 174 |
+
- 5
|
| 175 |
+
- 6
|
| 176 |
+
- 7
|
| 177 |
+
- 8
|
| 178 |
+
- 9
|
| 179 |
+
- 10
|
| 180 |
+
version: 2.5.0
|
| 181 |
+
val_freq: null
|
| 182 |
+
valid_dataset:
|
| 183 |
+
clic:
|
| 184 |
+
physical:
|
| 185 |
+
batch_size: 1
|
| 186 |
+
samples:
|
| 187 |
+
clic_edm_qq_pf:
|
| 188 |
+
splits:
|
| 189 |
+
- 1
|
| 190 |
+
- 2
|
| 191 |
+
- 3
|
| 192 |
+
- 4
|
| 193 |
+
- 5
|
| 194 |
+
- 6
|
| 195 |
+
- 7
|
| 196 |
+
- 8
|
| 197 |
+
- 9
|
| 198 |
+
- 10
|
| 199 |
+
version: 2.5.0
|
| 200 |
+
clic_edm_ttbar_pf:
|
| 201 |
+
splits:
|
| 202 |
+
- 1
|
| 203 |
+
- 2
|
| 204 |
+
- 3
|
| 205 |
+
- 4
|
| 206 |
+
- 5
|
| 207 |
+
- 6
|
| 208 |
+
- 7
|
| 209 |
+
- 8
|
| 210 |
+
- 9
|
| 211 |
+
- 10
|
| 212 |
+
version: 2.5.0
|
| 213 |
+
clic_edm_ww_fullhad_pf:
|
| 214 |
+
splits:
|
| 215 |
+
- 1
|
| 216 |
+
- 2
|
| 217 |
+
- 3
|
| 218 |
+
- 4
|
| 219 |
+
- 5
|
| 220 |
+
- 6
|
| 221 |
+
- 7
|
| 222 |
+
- 8
|
| 223 |
+
- 9
|
| 224 |
+
- 10
|
| 225 |
+
version: 2.5.0
|
clic/clusters/v2.3.0/pyg-clic_20250204_081614_352844/train.log
ADDED
|
@@ -0,0 +1,811 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2025-02-04 08:16:15,870] INFO: [0;35mWill use single-gpu: NVIDIA A100 80GB PCIe[0m
|
| 2 |
+
[2025-02-04 08:16:15,870] INFO: configured dtype=torch.bfloat16 for autocast
|
| 3 |
+
[2025-02-04 08:16:15,870] INFO: configured dtype=torch.bfloat16 for autocast
|
| 4 |
+
[2025-02-04 08:16:16,478] INFO: MLPF(
|
| 5 |
+
(nn0_id): ModuleList(
|
| 6 |
+
(0-1): 2 x Sequential(
|
| 7 |
+
(0): Linear(in_features=17, out_features=1024, bias=True)
|
| 8 |
+
(1): ELU(alpha=1.0)
|
| 9 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 10 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 11 |
+
(4): Linear(in_features=1024, out_features=1024, bias=True)
|
| 12 |
+
)
|
| 13 |
+
)
|
| 14 |
+
(nn0_reg): ModuleList(
|
| 15 |
+
(0-1): 2 x Sequential(
|
| 16 |
+
(0): Linear(in_features=17, out_features=1024, bias=True)
|
| 17 |
+
(1): ELU(alpha=1.0)
|
| 18 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 19 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 20 |
+
(4): Linear(in_features=1024, out_features=1024, bias=True)
|
| 21 |
+
)
|
| 22 |
+
)
|
| 23 |
+
(conv_id): ModuleList(
|
| 24 |
+
(0-2): 3 x CombinedGraphLayer(
|
| 25 |
+
(layernorm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
|
| 26 |
+
(ffn_dist): Sequential(
|
| 27 |
+
(0): Linear(in_features=1024, out_features=128, bias=True)
|
| 28 |
+
(1): ELU(alpha=1.0)
|
| 29 |
+
(2): Linear(in_features=128, out_features=128, bias=True)
|
| 30 |
+
(3): ELU(alpha=1.0)
|
| 31 |
+
(4): Linear(in_features=128, out_features=128, bias=True)
|
| 32 |
+
)
|
| 33 |
+
(message_building_layer): MessageBuildingLayerLSH(
|
| 34 |
+
(kernel): NodePairGaussianKernel()
|
| 35 |
+
)
|
| 36 |
+
(message_passing_layers): ModuleList(
|
| 37 |
+
(0-1): 2 x GHConvDense()
|
| 38 |
+
)
|
| 39 |
+
)
|
| 40 |
+
)
|
| 41 |
+
(conv_reg): ModuleList(
|
| 42 |
+
(0-2): 3 x CombinedGraphLayer(
|
| 43 |
+
(layernorm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
|
| 44 |
+
(ffn_dist): Sequential(
|
| 45 |
+
(0): Linear(in_features=1024, out_features=128, bias=True)
|
| 46 |
+
(1): ELU(alpha=1.0)
|
| 47 |
+
(2): Linear(in_features=128, out_features=128, bias=True)
|
| 48 |
+
(3): ELU(alpha=1.0)
|
| 49 |
+
(4): Linear(in_features=128, out_features=128, bias=True)
|
| 50 |
+
)
|
| 51 |
+
(message_building_layer): MessageBuildingLayerLSH(
|
| 52 |
+
(kernel): NodePairGaussianKernel()
|
| 53 |
+
)
|
| 54 |
+
(message_passing_layers): ModuleList(
|
| 55 |
+
(0-1): 2 x GHConvDense()
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
)
|
| 59 |
+
(nn_binary_particle): Sequential(
|
| 60 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 61 |
+
(1): ELU(alpha=1.0)
|
| 62 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 63 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 64 |
+
(4): Linear(in_features=1024, out_features=2, bias=True)
|
| 65 |
+
)
|
| 66 |
+
(nn_pid): Sequential(
|
| 67 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 68 |
+
(1): ELU(alpha=1.0)
|
| 69 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 70 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 71 |
+
(4): Linear(in_features=1024, out_features=6, bias=True)
|
| 72 |
+
)
|
| 73 |
+
(nn_pu): Sequential(
|
| 74 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 75 |
+
(1): ELU(alpha=1.0)
|
| 76 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 77 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 78 |
+
(4): Linear(in_features=1024, out_features=1, bias=True)
|
| 79 |
+
)
|
| 80 |
+
(nn_pt): RegressionOutput(
|
| 81 |
+
(nn): ModuleList(
|
| 82 |
+
(0-1): 2 x Sequential(
|
| 83 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 84 |
+
(1): ELU(alpha=1.0)
|
| 85 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 86 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 87 |
+
(4): Linear(in_features=1024, out_features=1, bias=True)
|
| 88 |
+
)
|
| 89 |
+
)
|
| 90 |
+
)
|
| 91 |
+
(nn_eta): RegressionOutput(
|
| 92 |
+
(nn): Sequential(
|
| 93 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 94 |
+
(1): ELU(alpha=1.0)
|
| 95 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 96 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 97 |
+
(4): Linear(in_features=1024, out_features=2, bias=True)
|
| 98 |
+
)
|
| 99 |
+
)
|
| 100 |
+
(nn_sin_phi): RegressionOutput(
|
| 101 |
+
(nn): Sequential(
|
| 102 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 103 |
+
(1): ELU(alpha=1.0)
|
| 104 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 105 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 106 |
+
(4): Linear(in_features=1024, out_features=2, bias=True)
|
| 107 |
+
)
|
| 108 |
+
)
|
| 109 |
+
(nn_cos_phi): RegressionOutput(
|
| 110 |
+
(nn): Sequential(
|
| 111 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 112 |
+
(1): ELU(alpha=1.0)
|
| 113 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 114 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 115 |
+
(4): Linear(in_features=1024, out_features=2, bias=True)
|
| 116 |
+
)
|
| 117 |
+
)
|
| 118 |
+
(nn_energy): RegressionOutput(
|
| 119 |
+
(nn): ModuleList(
|
| 120 |
+
(0-1): 2 x Sequential(
|
| 121 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 122 |
+
(1): ELU(alpha=1.0)
|
| 123 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 124 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 125 |
+
(4): Linear(in_features=1024, out_features=1, bias=True)
|
| 126 |
+
)
|
| 127 |
+
)
|
| 128 |
+
)
|
| 129 |
+
)
|
| 130 |
+
[2025-02-04 08:16:16,478] INFO: MLPF(
|
| 131 |
+
(nn0_id): ModuleList(
|
| 132 |
+
(0-1): 2 x Sequential(
|
| 133 |
+
(0): Linear(in_features=17, out_features=1024, bias=True)
|
| 134 |
+
(1): ELU(alpha=1.0)
|
| 135 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 136 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 137 |
+
(4): Linear(in_features=1024, out_features=1024, bias=True)
|
| 138 |
+
)
|
| 139 |
+
)
|
| 140 |
+
(nn0_reg): ModuleList(
|
| 141 |
+
(0-1): 2 x Sequential(
|
| 142 |
+
(0): Linear(in_features=17, out_features=1024, bias=True)
|
| 143 |
+
(1): ELU(alpha=1.0)
|
| 144 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 145 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 146 |
+
(4): Linear(in_features=1024, out_features=1024, bias=True)
|
| 147 |
+
)
|
| 148 |
+
)
|
| 149 |
+
(conv_id): ModuleList(
|
| 150 |
+
(0-2): 3 x CombinedGraphLayer(
|
| 151 |
+
(layernorm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
|
| 152 |
+
(ffn_dist): Sequential(
|
| 153 |
+
(0): Linear(in_features=1024, out_features=128, bias=True)
|
| 154 |
+
(1): ELU(alpha=1.0)
|
| 155 |
+
(2): Linear(in_features=128, out_features=128, bias=True)
|
| 156 |
+
(3): ELU(alpha=1.0)
|
| 157 |
+
(4): Linear(in_features=128, out_features=128, bias=True)
|
| 158 |
+
)
|
| 159 |
+
(message_building_layer): MessageBuildingLayerLSH(
|
| 160 |
+
(kernel): NodePairGaussianKernel()
|
| 161 |
+
)
|
| 162 |
+
(message_passing_layers): ModuleList(
|
| 163 |
+
(0-1): 2 x GHConvDense()
|
| 164 |
+
)
|
| 165 |
+
)
|
| 166 |
+
)
|
| 167 |
+
(conv_reg): ModuleList(
|
| 168 |
+
(0-2): 3 x CombinedGraphLayer(
|
| 169 |
+
(layernorm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
|
| 170 |
+
(ffn_dist): Sequential(
|
| 171 |
+
(0): Linear(in_features=1024, out_features=128, bias=True)
|
| 172 |
+
(1): ELU(alpha=1.0)
|
| 173 |
+
(2): Linear(in_features=128, out_features=128, bias=True)
|
| 174 |
+
(3): ELU(alpha=1.0)
|
| 175 |
+
(4): Linear(in_features=128, out_features=128, bias=True)
|
| 176 |
+
)
|
| 177 |
+
(message_building_layer): MessageBuildingLayerLSH(
|
| 178 |
+
(kernel): NodePairGaussianKernel()
|
| 179 |
+
)
|
| 180 |
+
(message_passing_layers): ModuleList(
|
| 181 |
+
(0-1): 2 x GHConvDense()
|
| 182 |
+
)
|
| 183 |
+
)
|
| 184 |
+
)
|
| 185 |
+
(nn_binary_particle): Sequential(
|
| 186 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 187 |
+
(1): ELU(alpha=1.0)
|
| 188 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 189 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 190 |
+
(4): Linear(in_features=1024, out_features=2, bias=True)
|
| 191 |
+
)
|
| 192 |
+
(nn_pid): Sequential(
|
| 193 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 194 |
+
(1): ELU(alpha=1.0)
|
| 195 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 196 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 197 |
+
(4): Linear(in_features=1024, out_features=6, bias=True)
|
| 198 |
+
)
|
| 199 |
+
(nn_pu): Sequential(
|
| 200 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 201 |
+
(1): ELU(alpha=1.0)
|
| 202 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 203 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 204 |
+
(4): Linear(in_features=1024, out_features=1, bias=True)
|
| 205 |
+
)
|
| 206 |
+
(nn_pt): RegressionOutput(
|
| 207 |
+
(nn): ModuleList(
|
| 208 |
+
(0-1): 2 x Sequential(
|
| 209 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 210 |
+
(1): ELU(alpha=1.0)
|
| 211 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 212 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 213 |
+
(4): Linear(in_features=1024, out_features=1, bias=True)
|
| 214 |
+
)
|
| 215 |
+
)
|
| 216 |
+
)
|
| 217 |
+
(nn_eta): RegressionOutput(
|
| 218 |
+
(nn): Sequential(
|
| 219 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 220 |
+
(1): ELU(alpha=1.0)
|
| 221 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 222 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 223 |
+
(4): Linear(in_features=1024, out_features=2, bias=True)
|
| 224 |
+
)
|
| 225 |
+
)
|
| 226 |
+
(nn_sin_phi): RegressionOutput(
|
| 227 |
+
(nn): Sequential(
|
| 228 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 229 |
+
(1): ELU(alpha=1.0)
|
| 230 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 231 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 232 |
+
(4): Linear(in_features=1024, out_features=2, bias=True)
|
| 233 |
+
)
|
| 234 |
+
)
|
| 235 |
+
(nn_cos_phi): RegressionOutput(
|
| 236 |
+
(nn): Sequential(
|
| 237 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 238 |
+
(1): ELU(alpha=1.0)
|
| 239 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 240 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 241 |
+
(4): Linear(in_features=1024, out_features=2, bias=True)
|
| 242 |
+
)
|
| 243 |
+
)
|
| 244 |
+
(nn_energy): RegressionOutput(
|
| 245 |
+
(nn): ModuleList(
|
| 246 |
+
(0-1): 2 x Sequential(
|
| 247 |
+
(0): Linear(in_features=1024, out_features=1024, bias=True)
|
| 248 |
+
(1): ELU(alpha=1.0)
|
| 249 |
+
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
| 250 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 251 |
+
(4): Linear(in_features=1024, out_features=1, bias=True)
|
| 252 |
+
)
|
| 253 |
+
)
|
| 254 |
+
)
|
| 255 |
+
)
|
| 256 |
+
[2025-02-04 08:16:16,480] INFO: Trainable parameters: 53574931
|
| 257 |
+
[2025-02-04 08:16:16,480] INFO: Trainable parameters: 53574931
|
| 258 |
+
[2025-02-04 08:16:16,480] INFO: Non-trainable parameters: 76800
|
| 259 |
+
[2025-02-04 08:16:16,480] INFO: Non-trainable parameters: 76800
|
| 260 |
+
[2025-02-04 08:16:16,480] INFO: Total parameters: 53651731
|
| 261 |
+
[2025-02-04 08:16:16,480] INFO: Total parameters: 53651731
|
| 262 |
+
[2025-02-04 08:16:16,482] INFO: Modules Trainable parameters Non-trainable parameters
|
| 263 |
+
nn0_id.0.0.weight 17408 0
|
| 264 |
+
nn0_id.0.0.bias 1024 0
|
| 265 |
+
nn0_id.0.2.weight 1024 0
|
| 266 |
+
nn0_id.0.2.bias 1024 0
|
| 267 |
+
nn0_id.0.4.weight 1048576 0
|
| 268 |
+
nn0_id.0.4.bias 1024 0
|
| 269 |
+
nn0_id.1.0.weight 17408 0
|
| 270 |
+
nn0_id.1.0.bias 1024 0
|
| 271 |
+
nn0_id.1.2.weight 1024 0
|
| 272 |
+
nn0_id.1.2.bias 1024 0
|
| 273 |
+
nn0_id.1.4.weight 1048576 0
|
| 274 |
+
nn0_id.1.4.bias 1024 0
|
| 275 |
+
nn0_reg.0.0.weight 17408 0
|
| 276 |
+
nn0_reg.0.0.bias 1024 0
|
| 277 |
+
nn0_reg.0.2.weight 1024 0
|
| 278 |
+
nn0_reg.0.2.bias 1024 0
|
| 279 |
+
nn0_reg.0.4.weight 1048576 0
|
| 280 |
+
nn0_reg.0.4.bias 1024 0
|
| 281 |
+
nn0_reg.1.0.weight 17408 0
|
| 282 |
+
nn0_reg.1.0.bias 1024 0
|
| 283 |
+
nn0_reg.1.2.weight 1024 0
|
| 284 |
+
nn0_reg.1.2.bias 1024 0
|
| 285 |
+
nn0_reg.1.4.weight 1048576 0
|
| 286 |
+
nn0_reg.1.4.bias 1024 0
|
| 287 |
+
conv_id.0.layernorm1.weight 1024 0
|
| 288 |
+
conv_id.0.layernorm1.bias 1024 0
|
| 289 |
+
conv_id.0.ffn_dist.0.weight 131072 0
|
| 290 |
+
conv_id.0.ffn_dist.0.bias 128 0
|
| 291 |
+
conv_id.0.ffn_dist.2.weight 16384 0
|
| 292 |
+
conv_id.0.ffn_dist.2.bias 128 0
|
| 293 |
+
conv_id.0.ffn_dist.4.weight 16384 0
|
| 294 |
+
conv_id.0.ffn_dist.4.bias 128 0
|
| 295 |
+
conv_id.0.message_building_layer.codebook_random_rotations 0 12800
|
| 296 |
+
conv_id.0.message_passing_layers.0.W_t 1048576 0
|
| 297 |
+
conv_id.0.message_passing_layers.0.b_t 1024 0
|
| 298 |
+
conv_id.0.message_passing_layers.0.W_h 1048576 0
|
| 299 |
+
conv_id.0.message_passing_layers.0.theta 1048576 0
|
| 300 |
+
conv_id.0.message_passing_layers.1.W_t 1048576 0
|
| 301 |
+
conv_id.0.message_passing_layers.1.b_t 1024 0
|
| 302 |
+
conv_id.0.message_passing_layers.1.W_h 1048576 0
|
| 303 |
+
conv_id.0.message_passing_layers.1.theta 1048576 0
|
| 304 |
+
conv_id.1.layernorm1.weight 1024 0
|
| 305 |
+
conv_id.1.layernorm1.bias 1024 0
|
| 306 |
+
conv_id.1.ffn_dist.0.weight 131072 0
|
| 307 |
+
conv_id.1.ffn_dist.0.bias 128 0
|
| 308 |
+
conv_id.1.ffn_dist.2.weight 16384 0
|
| 309 |
+
conv_id.1.ffn_dist.2.bias 128 0
|
| 310 |
+
conv_id.1.ffn_dist.4.weight 16384 0
|
| 311 |
+
conv_id.1.ffn_dist.4.bias 128 0
|
| 312 |
+
conv_id.1.message_building_layer.codebook_random_rotations 0 12800
|
| 313 |
+
conv_id.1.message_passing_layers.0.W_t 1048576 0
|
| 314 |
+
conv_id.1.message_passing_layers.0.b_t 1024 0
|
| 315 |
+
conv_id.1.message_passing_layers.0.W_h 1048576 0
|
| 316 |
+
conv_id.1.message_passing_layers.0.theta 1048576 0
|
| 317 |
+
conv_id.1.message_passing_layers.1.W_t 1048576 0
|
| 318 |
+
conv_id.1.message_passing_layers.1.b_t 1024 0
|
| 319 |
+
conv_id.1.message_passing_layers.1.W_h 1048576 0
|
| 320 |
+
conv_id.1.message_passing_layers.1.theta 1048576 0
|
| 321 |
+
conv_id.2.layernorm1.weight 1024 0
|
| 322 |
+
conv_id.2.layernorm1.bias 1024 0
|
| 323 |
+
conv_id.2.ffn_dist.0.weight 131072 0
|
| 324 |
+
conv_id.2.ffn_dist.0.bias 128 0
|
| 325 |
+
conv_id.2.ffn_dist.2.weight 16384 0
|
| 326 |
+
conv_id.2.ffn_dist.2.bias 128 0
|
| 327 |
+
conv_id.2.ffn_dist.4.weight 16384 0
|
| 328 |
+
conv_id.2.ffn_dist.4.bias 128 0
|
| 329 |
+
conv_id.2.message_building_layer.codebook_random_rotations 0 12800
|
| 330 |
+
conv_id.2.message_passing_layers.0.W_t 1048576 0
|
| 331 |
+
conv_id.2.message_passing_layers.0.b_t 1024 0
|
| 332 |
+
conv_id.2.message_passing_layers.0.W_h 1048576 0
|
| 333 |
+
conv_id.2.message_passing_layers.0.theta 1048576 0
|
| 334 |
+
conv_id.2.message_passing_layers.1.W_t 1048576 0
|
| 335 |
+
conv_id.2.message_passing_layers.1.b_t 1024 0
|
| 336 |
+
conv_id.2.message_passing_layers.1.W_h 1048576 0
|
| 337 |
+
conv_id.2.message_passing_layers.1.theta 1048576 0
|
| 338 |
+
conv_reg.0.layernorm1.weight 1024 0
|
| 339 |
+
conv_reg.0.layernorm1.bias 1024 0
|
| 340 |
+
conv_reg.0.ffn_dist.0.weight 131072 0
|
| 341 |
+
conv_reg.0.ffn_dist.0.bias 128 0
|
| 342 |
+
conv_reg.0.ffn_dist.2.weight 16384 0
|
| 343 |
+
conv_reg.0.ffn_dist.2.bias 128 0
|
| 344 |
+
conv_reg.0.ffn_dist.4.weight 16384 0
|
| 345 |
+
conv_reg.0.ffn_dist.4.bias 128 0
|
| 346 |
+
conv_reg.0.message_building_layer.codebook_random_rotations 0 12800
|
| 347 |
+
conv_reg.0.message_passing_layers.0.W_t 1048576 0
|
| 348 |
+
conv_reg.0.message_passing_layers.0.b_t 1024 0
|
| 349 |
+
conv_reg.0.message_passing_layers.0.W_h 1048576 0
|
| 350 |
+
conv_reg.0.message_passing_layers.0.theta 1048576 0
|
| 351 |
+
conv_reg.0.message_passing_layers.1.W_t 1048576 0
|
| 352 |
+
conv_reg.0.message_passing_layers.1.b_t 1024 0
|
| 353 |
+
conv_reg.0.message_passing_layers.1.W_h 1048576 0
|
| 354 |
+
conv_reg.0.message_passing_layers.1.theta 1048576 0
|
| 355 |
+
conv_reg.1.layernorm1.weight 1024 0
|
| 356 |
+
conv_reg.1.layernorm1.bias 1024 0
|
| 357 |
+
conv_reg.1.ffn_dist.0.weight 131072 0
|
| 358 |
+
conv_reg.1.ffn_dist.0.bias 128 0
|
| 359 |
+
conv_reg.1.ffn_dist.2.weight 16384 0
|
| 360 |
+
conv_reg.1.ffn_dist.2.bias 128 0
|
| 361 |
+
conv_reg.1.ffn_dist.4.weight 16384 0
|
| 362 |
+
conv_reg.1.ffn_dist.4.bias 128 0
|
| 363 |
+
conv_reg.1.message_building_layer.codebook_random_rotations 0 12800
|
| 364 |
+
conv_reg.1.message_passing_layers.0.W_t 1048576 0
|
| 365 |
+
conv_reg.1.message_passing_layers.0.b_t 1024 0
|
| 366 |
+
conv_reg.1.message_passing_layers.0.W_h 1048576 0
|
| 367 |
+
conv_reg.1.message_passing_layers.0.theta 1048576 0
|
| 368 |
+
conv_reg.1.message_passing_layers.1.W_t 1048576 0
|
| 369 |
+
conv_reg.1.message_passing_layers.1.b_t 1024 0
|
| 370 |
+
conv_reg.1.message_passing_layers.1.W_h 1048576 0
|
| 371 |
+
conv_reg.1.message_passing_layers.1.theta 1048576 0
|
| 372 |
+
conv_reg.2.layernorm1.weight 1024 0
|
| 373 |
+
conv_reg.2.layernorm1.bias 1024 0
|
| 374 |
+
conv_reg.2.ffn_dist.0.weight 131072 0
|
| 375 |
+
conv_reg.2.ffn_dist.0.bias 128 0
|
| 376 |
+
conv_reg.2.ffn_dist.2.weight 16384 0
|
| 377 |
+
conv_reg.2.ffn_dist.2.bias 128 0
|
| 378 |
+
conv_reg.2.ffn_dist.4.weight 16384 0
|
| 379 |
+
conv_reg.2.ffn_dist.4.bias 128 0
|
| 380 |
+
conv_reg.2.message_building_layer.codebook_random_rotations 0 12800
|
| 381 |
+
conv_reg.2.message_passing_layers.0.W_t 1048576 0
|
| 382 |
+
conv_reg.2.message_passing_layers.0.b_t 1024 0
|
| 383 |
+
conv_reg.2.message_passing_layers.0.W_h 1048576 0
|
| 384 |
+
conv_reg.2.message_passing_layers.0.theta 1048576 0
|
| 385 |
+
conv_reg.2.message_passing_layers.1.W_t 1048576 0
|
| 386 |
+
conv_reg.2.message_passing_layers.1.b_t 1024 0
|
| 387 |
+
conv_reg.2.message_passing_layers.1.W_h 1048576 0
|
| 388 |
+
conv_reg.2.message_passing_layers.1.theta 1048576 0
|
| 389 |
+
nn_binary_particle.0.weight 1048576 0
|
| 390 |
+
nn_binary_particle.0.bias 1024 0
|
| 391 |
+
nn_binary_particle.2.weight 1024 0
|
| 392 |
+
nn_binary_particle.2.bias 1024 0
|
| 393 |
+
nn_binary_particle.4.weight 2048 0
|
| 394 |
+
nn_binary_particle.4.bias 2 0
|
| 395 |
+
nn_pid.0.weight 1048576 0
|
| 396 |
+
nn_pid.0.bias 1024 0
|
| 397 |
+
nn_pid.2.weight 1024 0
|
| 398 |
+
nn_pid.2.bias 1024 0
|
| 399 |
+
nn_pid.4.weight 6144 0
|
| 400 |
+
nn_pid.4.bias 6 0
|
| 401 |
+
nn_pu.0.weight 1048576 0
|
| 402 |
+
nn_pu.0.bias 1024 0
|
| 403 |
+
nn_pu.2.weight 1024 0
|
| 404 |
+
nn_pu.2.bias 1024 0
|
| 405 |
+
nn_pu.4.weight 1024 0
|
| 406 |
+
nn_pu.4.bias 1 0
|
| 407 |
+
nn_pt.nn.0.0.weight 1048576 0
|
| 408 |
+
nn_pt.nn.0.0.bias 1024 0
|
| 409 |
+
nn_pt.nn.0.2.weight 1024 0
|
| 410 |
+
nn_pt.nn.0.2.bias 1024 0
|
| 411 |
+
nn_pt.nn.0.4.weight 1024 0
|
| 412 |
+
nn_pt.nn.0.4.bias 1 0
|
| 413 |
+
nn_pt.nn.1.0.weight 1048576 0
|
| 414 |
+
nn_pt.nn.1.0.bias 1024 0
|
| 415 |
+
nn_pt.nn.1.2.weight 1024 0
|
| 416 |
+
nn_pt.nn.1.2.bias 1024 0
|
| 417 |
+
nn_pt.nn.1.4.weight 1024 0
|
| 418 |
+
nn_pt.nn.1.4.bias 1 0
|
| 419 |
+
nn_eta.nn.0.weight 1048576 0
|
| 420 |
+
nn_eta.nn.0.bias 1024 0
|
| 421 |
+
nn_eta.nn.2.weight 1024 0
|
| 422 |
+
nn_eta.nn.2.bias 1024 0
|
| 423 |
+
nn_eta.nn.4.weight 2048 0
|
| 424 |
+
nn_eta.nn.4.bias 2 0
|
| 425 |
+
nn_sin_phi.nn.0.weight 1048576 0
|
| 426 |
+
nn_sin_phi.nn.0.bias 1024 0
|
| 427 |
+
nn_sin_phi.nn.2.weight 1024 0
|
| 428 |
+
nn_sin_phi.nn.2.bias 1024 0
|
| 429 |
+
nn_sin_phi.nn.4.weight 2048 0
|
| 430 |
+
nn_sin_phi.nn.4.bias 2 0
|
| 431 |
+
nn_cos_phi.nn.0.weight 1048576 0
|
| 432 |
+
nn_cos_phi.nn.0.bias 1024 0
|
| 433 |
+
nn_cos_phi.nn.2.weight 1024 0
|
| 434 |
+
nn_cos_phi.nn.2.bias 1024 0
|
| 435 |
+
nn_cos_phi.nn.4.weight 2048 0
|
| 436 |
+
nn_cos_phi.nn.4.bias 2 0
|
| 437 |
+
nn_energy.nn.0.0.weight 1048576 0
|
| 438 |
+
nn_energy.nn.0.0.bias 1024 0
|
| 439 |
+
nn_energy.nn.0.2.weight 1024 0
|
| 440 |
+
nn_energy.nn.0.2.bias 1024 0
|
| 441 |
+
nn_energy.nn.0.4.weight 1024 0
|
| 442 |
+
nn_energy.nn.0.4.bias 1 0
|
| 443 |
+
nn_energy.nn.1.0.weight 1048576 0
|
| 444 |
+
nn_energy.nn.1.0.bias 1024 0
|
| 445 |
+
nn_energy.nn.1.2.weight 1024 0
|
| 446 |
+
nn_energy.nn.1.2.bias 1024 0
|
| 447 |
+
nn_energy.nn.1.4.weight 1024 0
|
| 448 |
+
nn_energy.nn.1.4.bias 1 0
|
| 449 |
+
[2025-02-04 08:16:16,482] INFO: Modules Trainable parameters Non-trainable parameters
|
| 450 |
+
nn0_id.0.0.weight 17408 0
|
| 451 |
+
nn0_id.0.0.bias 1024 0
|
| 452 |
+
nn0_id.0.2.weight 1024 0
|
| 453 |
+
nn0_id.0.2.bias 1024 0
|
| 454 |
+
nn0_id.0.4.weight 1048576 0
|
| 455 |
+
nn0_id.0.4.bias 1024 0
|
| 456 |
+
nn0_id.1.0.weight 17408 0
|
| 457 |
+
nn0_id.1.0.bias 1024 0
|
| 458 |
+
nn0_id.1.2.weight 1024 0
|
| 459 |
+
nn0_id.1.2.bias 1024 0
|
| 460 |
+
nn0_id.1.4.weight 1048576 0
|
| 461 |
+
nn0_id.1.4.bias 1024 0
|
| 462 |
+
nn0_reg.0.0.weight 17408 0
|
| 463 |
+
nn0_reg.0.0.bias 1024 0
|
| 464 |
+
nn0_reg.0.2.weight 1024 0
|
| 465 |
+
nn0_reg.0.2.bias 1024 0
|
| 466 |
+
nn0_reg.0.4.weight 1048576 0
|
| 467 |
+
nn0_reg.0.4.bias 1024 0
|
| 468 |
+
nn0_reg.1.0.weight 17408 0
|
| 469 |
+
nn0_reg.1.0.bias 1024 0
|
| 470 |
+
nn0_reg.1.2.weight 1024 0
|
| 471 |
+
nn0_reg.1.2.bias 1024 0
|
| 472 |
+
nn0_reg.1.4.weight 1048576 0
|
| 473 |
+
nn0_reg.1.4.bias 1024 0
|
| 474 |
+
conv_id.0.layernorm1.weight 1024 0
|
| 475 |
+
conv_id.0.layernorm1.bias 1024 0
|
| 476 |
+
conv_id.0.ffn_dist.0.weight 131072 0
|
| 477 |
+
conv_id.0.ffn_dist.0.bias 128 0
|
| 478 |
+
conv_id.0.ffn_dist.2.weight 16384 0
|
| 479 |
+
conv_id.0.ffn_dist.2.bias 128 0
|
| 480 |
+
conv_id.0.ffn_dist.4.weight 16384 0
|
| 481 |
+
conv_id.0.ffn_dist.4.bias 128 0
|
| 482 |
+
conv_id.0.message_building_layer.codebook_random_rotations 0 12800
|
| 483 |
+
conv_id.0.message_passing_layers.0.W_t 1048576 0
|
| 484 |
+
conv_id.0.message_passing_layers.0.b_t 1024 0
|
| 485 |
+
conv_id.0.message_passing_layers.0.W_h 1048576 0
|
| 486 |
+
conv_id.0.message_passing_layers.0.theta 1048576 0
|
| 487 |
+
conv_id.0.message_passing_layers.1.W_t 1048576 0
|
| 488 |
+
conv_id.0.message_passing_layers.1.b_t 1024 0
|
| 489 |
+
conv_id.0.message_passing_layers.1.W_h 1048576 0
|
| 490 |
+
conv_id.0.message_passing_layers.1.theta 1048576 0
|
| 491 |
+
conv_id.1.layernorm1.weight 1024 0
|
| 492 |
+
conv_id.1.layernorm1.bias 1024 0
|
| 493 |
+
conv_id.1.ffn_dist.0.weight 131072 0
|
| 494 |
+
conv_id.1.ffn_dist.0.bias 128 0
|
| 495 |
+
conv_id.1.ffn_dist.2.weight 16384 0
|
| 496 |
+
conv_id.1.ffn_dist.2.bias 128 0
|
| 497 |
+
conv_id.1.ffn_dist.4.weight 16384 0
|
| 498 |
+
conv_id.1.ffn_dist.4.bias 128 0
|
| 499 |
+
conv_id.1.message_building_layer.codebook_random_rotations 0 12800
|
| 500 |
+
conv_id.1.message_passing_layers.0.W_t 1048576 0
|
| 501 |
+
conv_id.1.message_passing_layers.0.b_t 1024 0
|
| 502 |
+
conv_id.1.message_passing_layers.0.W_h 1048576 0
|
| 503 |
+
conv_id.1.message_passing_layers.0.theta 1048576 0
|
| 504 |
+
conv_id.1.message_passing_layers.1.W_t 1048576 0
|
| 505 |
+
conv_id.1.message_passing_layers.1.b_t 1024 0
|
| 506 |
+
conv_id.1.message_passing_layers.1.W_h 1048576 0
|
| 507 |
+
conv_id.1.message_passing_layers.1.theta 1048576 0
|
| 508 |
+
conv_id.2.layernorm1.weight 1024 0
|
| 509 |
+
conv_id.2.layernorm1.bias 1024 0
|
| 510 |
+
conv_id.2.ffn_dist.0.weight 131072 0
|
| 511 |
+
conv_id.2.ffn_dist.0.bias 128 0
|
| 512 |
+
conv_id.2.ffn_dist.2.weight 16384 0
|
| 513 |
+
conv_id.2.ffn_dist.2.bias 128 0
|
| 514 |
+
conv_id.2.ffn_dist.4.weight 16384 0
|
| 515 |
+
conv_id.2.ffn_dist.4.bias 128 0
|
| 516 |
+
conv_id.2.message_building_layer.codebook_random_rotations 0 12800
|
| 517 |
+
conv_id.2.message_passing_layers.0.W_t 1048576 0
|
| 518 |
+
conv_id.2.message_passing_layers.0.b_t 1024 0
|
| 519 |
+
conv_id.2.message_passing_layers.0.W_h 1048576 0
|
| 520 |
+
conv_id.2.message_passing_layers.0.theta 1048576 0
|
| 521 |
+
conv_id.2.message_passing_layers.1.W_t 1048576 0
|
| 522 |
+
conv_id.2.message_passing_layers.1.b_t 1024 0
|
| 523 |
+
conv_id.2.message_passing_layers.1.W_h 1048576 0
|
| 524 |
+
conv_id.2.message_passing_layers.1.theta 1048576 0
|
| 525 |
+
conv_reg.0.layernorm1.weight 1024 0
|
| 526 |
+
conv_reg.0.layernorm1.bias 1024 0
|
| 527 |
+
conv_reg.0.ffn_dist.0.weight 131072 0
|
| 528 |
+
conv_reg.0.ffn_dist.0.bias 128 0
|
| 529 |
+
conv_reg.0.ffn_dist.2.weight 16384 0
|
| 530 |
+
conv_reg.0.ffn_dist.2.bias 128 0
|
| 531 |
+
conv_reg.0.ffn_dist.4.weight 16384 0
|
| 532 |
+
conv_reg.0.ffn_dist.4.bias 128 0
|
| 533 |
+
conv_reg.0.message_building_layer.codebook_random_rotations 0 12800
|
| 534 |
+
conv_reg.0.message_passing_layers.0.W_t 1048576 0
|
| 535 |
+
conv_reg.0.message_passing_layers.0.b_t 1024 0
|
| 536 |
+
conv_reg.0.message_passing_layers.0.W_h 1048576 0
|
| 537 |
+
conv_reg.0.message_passing_layers.0.theta 1048576 0
|
| 538 |
+
conv_reg.0.message_passing_layers.1.W_t 1048576 0
|
| 539 |
+
conv_reg.0.message_passing_layers.1.b_t 1024 0
|
| 540 |
+
conv_reg.0.message_passing_layers.1.W_h 1048576 0
|
| 541 |
+
conv_reg.0.message_passing_layers.1.theta 1048576 0
|
| 542 |
+
conv_reg.1.layernorm1.weight 1024 0
|
| 543 |
+
conv_reg.1.layernorm1.bias 1024 0
|
| 544 |
+
conv_reg.1.ffn_dist.0.weight 131072 0
|
| 545 |
+
conv_reg.1.ffn_dist.0.bias 128 0
|
| 546 |
+
conv_reg.1.ffn_dist.2.weight 16384 0
|
| 547 |
+
conv_reg.1.ffn_dist.2.bias 128 0
|
| 548 |
+
conv_reg.1.ffn_dist.4.weight 16384 0
|
| 549 |
+
conv_reg.1.ffn_dist.4.bias 128 0
|
| 550 |
+
conv_reg.1.message_building_layer.codebook_random_rotations 0 12800
|
| 551 |
+
conv_reg.1.message_passing_layers.0.W_t 1048576 0
|
| 552 |
+
conv_reg.1.message_passing_layers.0.b_t 1024 0
|
| 553 |
+
conv_reg.1.message_passing_layers.0.W_h 1048576 0
|
| 554 |
+
conv_reg.1.message_passing_layers.0.theta 1048576 0
|
| 555 |
+
conv_reg.1.message_passing_layers.1.W_t 1048576 0
|
| 556 |
+
conv_reg.1.message_passing_layers.1.b_t 1024 0
|
| 557 |
+
conv_reg.1.message_passing_layers.1.W_h 1048576 0
|
| 558 |
+
conv_reg.1.message_passing_layers.1.theta 1048576 0
|
| 559 |
+
conv_reg.2.layernorm1.weight 1024 0
|
| 560 |
+
conv_reg.2.layernorm1.bias 1024 0
|
| 561 |
+
conv_reg.2.ffn_dist.0.weight 131072 0
|
| 562 |
+
conv_reg.2.ffn_dist.0.bias 128 0
|
| 563 |
+
conv_reg.2.ffn_dist.2.weight 16384 0
|
| 564 |
+
conv_reg.2.ffn_dist.2.bias 128 0
|
| 565 |
+
conv_reg.2.ffn_dist.4.weight 16384 0
|
| 566 |
+
conv_reg.2.ffn_dist.4.bias 128 0
|
| 567 |
+
conv_reg.2.message_building_layer.codebook_random_rotations 0 12800
|
| 568 |
+
conv_reg.2.message_passing_layers.0.W_t 1048576 0
|
| 569 |
+
conv_reg.2.message_passing_layers.0.b_t 1024 0
|
| 570 |
+
conv_reg.2.message_passing_layers.0.W_h 1048576 0
|
| 571 |
+
conv_reg.2.message_passing_layers.0.theta 1048576 0
|
| 572 |
+
conv_reg.2.message_passing_layers.1.W_t 1048576 0
|
| 573 |
+
conv_reg.2.message_passing_layers.1.b_t 1024 0
|
| 574 |
+
conv_reg.2.message_passing_layers.1.W_h 1048576 0
|
| 575 |
+
conv_reg.2.message_passing_layers.1.theta 1048576 0
|
| 576 |
+
nn_binary_particle.0.weight 1048576 0
|
| 577 |
+
nn_binary_particle.0.bias 1024 0
|
| 578 |
+
nn_binary_particle.2.weight 1024 0
|
| 579 |
+
nn_binary_particle.2.bias 1024 0
|
| 580 |
+
nn_binary_particle.4.weight 2048 0
|
| 581 |
+
nn_binary_particle.4.bias 2 0
|
| 582 |
+
nn_pid.0.weight 1048576 0
|
| 583 |
+
nn_pid.0.bias 1024 0
|
| 584 |
+
nn_pid.2.weight 1024 0
|
| 585 |
+
nn_pid.2.bias 1024 0
|
| 586 |
+
nn_pid.4.weight 6144 0
|
| 587 |
+
nn_pid.4.bias 6 0
|
| 588 |
+
nn_pu.0.weight 1048576 0
|
| 589 |
+
nn_pu.0.bias 1024 0
|
| 590 |
+
nn_pu.2.weight 1024 0
|
| 591 |
+
nn_pu.2.bias 1024 0
|
| 592 |
+
nn_pu.4.weight 1024 0
|
| 593 |
+
nn_pu.4.bias 1 0
|
| 594 |
+
nn_pt.nn.0.0.weight 1048576 0
|
| 595 |
+
nn_pt.nn.0.0.bias 1024 0
|
| 596 |
+
nn_pt.nn.0.2.weight 1024 0
|
| 597 |
+
nn_pt.nn.0.2.bias 1024 0
|
| 598 |
+
nn_pt.nn.0.4.weight 1024 0
|
| 599 |
+
nn_pt.nn.0.4.bias 1 0
|
| 600 |
+
nn_pt.nn.1.0.weight 1048576 0
|
| 601 |
+
nn_pt.nn.1.0.bias 1024 0
|
| 602 |
+
nn_pt.nn.1.2.weight 1024 0
|
| 603 |
+
nn_pt.nn.1.2.bias 1024 0
|
| 604 |
+
nn_pt.nn.1.4.weight 1024 0
|
| 605 |
+
nn_pt.nn.1.4.bias 1 0
|
| 606 |
+
nn_eta.nn.0.weight 1048576 0
|
| 607 |
+
nn_eta.nn.0.bias 1024 0
|
| 608 |
+
nn_eta.nn.2.weight 1024 0
|
| 609 |
+
nn_eta.nn.2.bias 1024 0
|
| 610 |
+
nn_eta.nn.4.weight 2048 0
|
| 611 |
+
nn_eta.nn.4.bias 2 0
|
| 612 |
+
nn_sin_phi.nn.0.weight 1048576 0
|
| 613 |
+
nn_sin_phi.nn.0.bias 1024 0
|
| 614 |
+
nn_sin_phi.nn.2.weight 1024 0
|
| 615 |
+
nn_sin_phi.nn.2.bias 1024 0
|
| 616 |
+
nn_sin_phi.nn.4.weight 2048 0
|
| 617 |
+
nn_sin_phi.nn.4.bias 2 0
|
| 618 |
+
nn_cos_phi.nn.0.weight 1048576 0
|
| 619 |
+
nn_cos_phi.nn.0.bias 1024 0
|
| 620 |
+
nn_cos_phi.nn.2.weight 1024 0
|
| 621 |
+
nn_cos_phi.nn.2.bias 1024 0
|
| 622 |
+
nn_cos_phi.nn.4.weight 2048 0
|
| 623 |
+
nn_cos_phi.nn.4.bias 2 0
|
| 624 |
+
nn_energy.nn.0.0.weight 1048576 0
|
| 625 |
+
nn_energy.nn.0.0.bias 1024 0
|
| 626 |
+
nn_energy.nn.0.2.weight 1024 0
|
| 627 |
+
nn_energy.nn.0.2.bias 1024 0
|
| 628 |
+
nn_energy.nn.0.4.weight 1024 0
|
| 629 |
+
nn_energy.nn.0.4.bias 1 0
|
| 630 |
+
nn_energy.nn.1.0.weight 1048576 0
|
| 631 |
+
nn_energy.nn.1.0.bias 1024 0
|
| 632 |
+
nn_energy.nn.1.2.weight 1024 0
|
| 633 |
+
nn_energy.nn.1.2.bias 1024 0
|
| 634 |
+
nn_energy.nn.1.4.weight 1024 0
|
| 635 |
+
nn_energy.nn.1.4.bias 1 0
|
| 636 |
+
[2025-02-04 08:16:16,483] INFO: Creating experiment dir experiments/pyg-clic_20250204_081614_352844
|
| 637 |
+
[2025-02-04 08:16:16,483] INFO: Creating experiment dir experiments/pyg-clic_20250204_081614_352844
|
| 638 |
+
[2025-02-04 08:16:16,483] INFO: [1mModel directory experiments/pyg-clic_20250204_081614_352844[0m
|
| 639 |
+
[2025-02-04 08:16:16,483] INFO: [1mModel directory experiments/pyg-clic_20250204_081614_352844[0m
|
| 640 |
+
[2025-02-04 08:16:21,637] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719492[0m
|
| 641 |
+
[2025-02-04 08:16:21,637] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719492[0m
|
| 642 |
+
[2025-02-04 08:16:21,650] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719490[0m
|
| 643 |
+
[2025-02-04 08:16:21,650] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719490[0m
|
| 644 |
+
[2025-02-04 08:16:21,662] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719489[0m
|
| 645 |
+
[2025-02-04 08:16:21,662] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719489[0m
|
| 646 |
+
[2025-02-04 08:16:21,674] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719515[0m
|
| 647 |
+
[2025-02-04 08:16:21,674] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719515[0m
|
| 648 |
+
[2025-02-04 08:16:21,688] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719510[0m
|
| 649 |
+
[2025-02-04 08:16:21,688] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719510[0m
|
| 650 |
+
[2025-02-04 08:16:21,700] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719503[0m
|
| 651 |
+
[2025-02-04 08:16:21,700] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719503[0m
|
| 652 |
+
[2025-02-04 08:16:21,712] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719509[0m
|
| 653 |
+
[2025-02-04 08:16:21,712] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719509[0m
|
| 654 |
+
[2025-02-04 08:16:21,724] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719484[0m
|
| 655 |
+
[2025-02-04 08:16:21,724] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719484[0m
|
| 656 |
+
[2025-02-04 08:16:21,736] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719474[0m
|
| 657 |
+
[2025-02-04 08:16:21,736] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719474[0m
|
| 658 |
+
[2025-02-04 08:16:21,748] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 720386[0m
|
| 659 |
+
[2025-02-04 08:16:21,748] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 720386[0m
|
| 660 |
+
[2025-02-04 08:16:21,767] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 661 |
+
[2025-02-04 08:16:21,767] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 662 |
+
[2025-02-04 08:16:21,786] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 663 |
+
[2025-02-04 08:16:21,786] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 664 |
+
[2025-02-04 08:16:21,807] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 665 |
+
[2025-02-04 08:16:21,807] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 666 |
+
[2025-02-04 08:16:21,827] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 667 |
+
[2025-02-04 08:16:21,827] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 668 |
+
[2025-02-04 08:16:21,846] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 669 |
+
[2025-02-04 08:16:21,846] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 670 |
+
[2025-02-04 08:16:21,865] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 671 |
+
[2025-02-04 08:16:21,865] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 672 |
+
[2025-02-04 08:16:21,884] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 673 |
+
[2025-02-04 08:16:21,884] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 674 |
+
[2025-02-04 08:16:21,903] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 675 |
+
[2025-02-04 08:16:21,903] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 676 |
+
[2025-02-04 08:16:21,925] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 677 |
+
[2025-02-04 08:16:21,925] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m
|
| 678 |
+
[2025-02-04 08:16:21,944] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 714700[0m
|
| 679 |
+
[2025-02-04 08:16:21,944] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 714700[0m
|
| 680 |
+
[2025-02-04 08:16:21,956] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 681 |
+
[2025-02-04 08:16:21,956] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 682 |
+
[2025-02-04 08:16:21,968] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 683 |
+
[2025-02-04 08:16:21,968] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 684 |
+
[2025-02-04 08:16:21,980] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 685 |
+
[2025-02-04 08:16:21,980] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 686 |
+
[2025-02-04 08:16:21,992] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 687 |
+
[2025-02-04 08:16:21,992] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 688 |
+
[2025-02-04 08:16:22,005] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 689 |
+
[2025-02-04 08:16:22,005] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 690 |
+
[2025-02-04 08:16:22,016] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 691 |
+
[2025-02-04 08:16:22,016] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 692 |
+
[2025-02-04 08:16:22,028] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 693 |
+
[2025-02-04 08:16:22,028] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 694 |
+
[2025-02-04 08:16:22,040] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 695 |
+
[2025-02-04 08:16:22,040] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 696 |
+
[2025-02-04 08:16:22,053] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 697 |
+
[2025-02-04 08:16:22,053] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m
|
| 698 |
+
[2025-02-04 08:16:22,065] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720700[0m
|
| 699 |
+
[2025-02-04 08:16:22,065] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720700[0m
|
| 700 |
+
[2025-02-04 08:16:23,297] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79948[0m
|
| 701 |
+
[2025-02-04 08:16:23,297] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79948[0m
|
| 702 |
+
[2025-02-04 08:16:23,302] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m
|
| 703 |
+
[2025-02-04 08:16:23,302] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m
|
| 704 |
+
[2025-02-04 08:16:23,308] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79939[0m
|
| 705 |
+
[2025-02-04 08:16:23,308] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79939[0m
|
| 706 |
+
[2025-02-04 08:16:23,313] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79939[0m
|
| 707 |
+
[2025-02-04 08:16:23,313] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79939[0m
|
| 708 |
+
[2025-02-04 08:16:23,318] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m
|
| 709 |
+
[2025-02-04 08:16:23,318] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m
|
| 710 |
+
[2025-02-04 08:16:23,323] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m
|
| 711 |
+
[2025-02-04 08:16:23,323] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m
|
| 712 |
+
[2025-02-04 08:16:23,327] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79938[0m
|
| 713 |
+
[2025-02-04 08:16:23,327] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79938[0m
|
| 714 |
+
[2025-02-04 08:16:23,332] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79957[0m
|
| 715 |
+
[2025-02-04 08:16:23,332] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79957[0m
|
| 716 |
+
[2025-02-04 08:16:23,337] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79955[0m
|
| 717 |
+
[2025-02-04 08:16:23,337] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79955[0m
|
| 718 |
+
[2025-02-04 08:16:23,342] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 80035[0m
|
| 719 |
+
[2025-02-04 08:16:23,342] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 80035[0m
|
| 720 |
+
[2025-02-04 08:16:23,348] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 721 |
+
[2025-02-04 08:16:23,348] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 722 |
+
[2025-02-04 08:16:23,355] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 723 |
+
[2025-02-04 08:16:23,355] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 724 |
+
[2025-02-04 08:16:23,362] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 725 |
+
[2025-02-04 08:16:23,362] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 726 |
+
[2025-02-04 08:16:23,502] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 727 |
+
[2025-02-04 08:16:23,502] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 728 |
+
[2025-02-04 08:16:23,511] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 729 |
+
[2025-02-04 08:16:23,511] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 730 |
+
[2025-02-04 08:16:23,518] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 731 |
+
[2025-02-04 08:16:23,518] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 732 |
+
[2025-02-04 08:16:23,525] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 733 |
+
[2025-02-04 08:16:23,525] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 734 |
+
[2025-02-04 08:16:23,533] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 735 |
+
[2025-02-04 08:16:23,533] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 736 |
+
[2025-02-04 08:16:23,539] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 737 |
+
[2025-02-04 08:16:23,539] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m
|
| 738 |
+
[2025-02-04 08:16:23,545] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79700[0m
|
| 739 |
+
[2025-02-04 08:16:23,545] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79700[0m
|
| 740 |
+
[2025-02-04 08:16:23,552] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 741 |
+
[2025-02-04 08:16:23,552] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 742 |
+
[2025-02-04 08:16:23,558] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 743 |
+
[2025-02-04 08:16:23,558] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 744 |
+
[2025-02-04 08:16:23,565] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 745 |
+
[2025-02-04 08:16:23,565] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 746 |
+
[2025-02-04 08:16:23,570] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 747 |
+
[2025-02-04 08:16:23,570] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 748 |
+
[2025-02-04 08:16:23,575] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 749 |
+
[2025-02-04 08:16:23,575] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 750 |
+
[2025-02-04 08:16:23,581] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 751 |
+
[2025-02-04 08:16:23,581] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 752 |
+
[2025-02-04 08:16:23,586] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 753 |
+
[2025-02-04 08:16:23,586] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 754 |
+
[2025-02-04 08:16:23,592] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 755 |
+
[2025-02-04 08:16:23,592] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 756 |
+
[2025-02-04 08:16:23,597] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 757 |
+
[2025-02-04 08:16:23,597] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m
|
| 758 |
+
[2025-02-04 08:16:23,603] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80100[0m
|
| 759 |
+
[2025-02-04 08:16:23,603] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80100[0m
|
| 760 |
+
[2025-02-06 06:49:31,898] INFO: [1mRank 0: epoch=1/10 train_loss=5.1229 valid_loss=4.8985 stale=0 epoch_train_time=2768.98m epoch_valid_time=24.13m epoch_total_time=2793.11m eta=25138.2m[0m
|
| 761 |
+
[2025-02-06 06:49:31,898] INFO: [1mRank 0: epoch=1/10 train_loss=5.1229 valid_loss=4.8985 stale=0 epoch_train_time=2768.98m epoch_valid_time=24.13m epoch_total_time=2793.11m eta=25138.2m[0m
|
| 762 |
+
[2025-02-06 06:49:31,908] INFO: split_configs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
| 763 |
+
[2025-02-06 06:49:31,908] INFO: split_configs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
| 764 |
+
[2025-02-06 06:49:32,016] INFO: [0;34mtest_dataset: clic_edm_qq_pf, 2000[0m
|
| 765 |
+
[2025-02-06 06:49:32,016] INFO: [0;34mtest_dataset: clic_edm_qq_pf, 2000[0m
|
| 766 |
+
[2025-02-06 06:49:32,027] INFO: Running predictions on clic_edm_qq_pf
|
| 767 |
+
[2025-02-06 06:49:32,027] INFO: Running predictions on clic_edm_qq_pf
|
| 768 |
+
[2025-02-06 06:49:35,361] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_0.parquet
|
| 769 |
+
[2025-02-06 06:49:35,361] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_0.parquet
|
| 770 |
+
[2025-02-06 06:49:35,858] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_1.parquet
|
| 771 |
+
[2025-02-06 06:49:35,858] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_1.parquet
|
| 772 |
+
[2025-02-06 06:49:36,326] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_2.parquet
|
| 773 |
+
[2025-02-06 06:49:36,326] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_2.parquet
|
| 774 |
+
[2025-02-06 06:49:36,796] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_3.parquet
|
| 775 |
+
[2025-02-06 06:49:36,796] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_3.parquet
|
| 776 |
+
[2025-02-06 06:49:37,321] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_4.parquet
|
| 777 |
+
[2025-02-06 06:49:37,321] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_4.parquet
|
| 778 |
+
[2025-02-06 06:49:37,832] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_5.parquet
|
| 779 |
+
[2025-02-06 06:49:37,832] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_5.parquet
|
| 780 |
+
[2025-02-06 06:49:38,305] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_6.parquet
|
| 781 |
+
[2025-02-06 06:49:38,305] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_6.parquet
|
| 782 |
+
[2025-02-06 06:49:38,752] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_7.parquet
|
| 783 |
+
[2025-02-06 06:49:38,752] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_1/clic_edm_qq_pf/pred_0_7.parquet
|
| 784 |
+
[2025-02-06 06:49:38,828] INFO: Time taken to make predictions on device 0 is: 0.11 min
|
| 785 |
+
[2025-02-06 06:49:38,828] INFO: Time taken to make predictions on device 0 is: 0.11 min
|
| 786 |
+
[2025-02-08 05:24:19,735] INFO: [1mRank 0: epoch=2/10 train_loss=4.8377 valid_loss=5.2062 stale=1 epoch_train_time=2769.77m epoch_valid_time=24.22m epoch_total_time=2793.99m eta=22351.7m[0m
|
| 787 |
+
[2025-02-08 05:24:19,735] INFO: [1mRank 0: epoch=2/10 train_loss=4.8377 valid_loss=5.2062 stale=1 epoch_train_time=2769.77m epoch_valid_time=24.22m epoch_total_time=2793.99m eta=22351.7m[0m
|
| 788 |
+
[2025-02-08 05:24:19,746] INFO: split_configs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
| 789 |
+
[2025-02-08 05:24:19,746] INFO: split_configs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
| 790 |
+
[2025-02-08 05:24:19,848] INFO: [0;34mtest_dataset: clic_edm_qq_pf, 2000[0m
|
| 791 |
+
[2025-02-08 05:24:19,848] INFO: [0;34mtest_dataset: clic_edm_qq_pf, 2000[0m
|
| 792 |
+
[2025-02-08 05:24:19,852] INFO: Running predictions on clic_edm_qq_pf
|
| 793 |
+
[2025-02-08 05:24:19,852] INFO: Running predictions on clic_edm_qq_pf
|
| 794 |
+
[2025-02-08 05:24:21,022] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_0.parquet
|
| 795 |
+
[2025-02-08 05:24:21,022] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_0.parquet
|
| 796 |
+
[2025-02-08 05:24:21,482] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_1.parquet
|
| 797 |
+
[2025-02-08 05:24:21,482] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_1.parquet
|
| 798 |
+
[2025-02-08 05:24:21,958] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_2.parquet
|
| 799 |
+
[2025-02-08 05:24:21,958] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_2.parquet
|
| 800 |
+
[2025-02-08 05:24:22,535] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_3.parquet
|
| 801 |
+
[2025-02-08 05:24:22,535] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_3.parquet
|
| 802 |
+
[2025-02-08 05:24:23,071] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_4.parquet
|
| 803 |
+
[2025-02-08 05:24:23,071] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_4.parquet
|
| 804 |
+
[2025-02-08 05:24:23,545] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_5.parquet
|
| 805 |
+
[2025-02-08 05:24:23,545] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_5.parquet
|
| 806 |
+
[2025-02-08 05:24:24,031] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_6.parquet
|
| 807 |
+
[2025-02-08 05:24:24,031] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_6.parquet
|
| 808 |
+
[2025-02-08 05:24:24,463] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_7.parquet
|
| 809 |
+
[2025-02-08 05:24:24,463] INFO: Saved predictions at experiments/pyg-clic_20250204_081614_352844/preds_epoch_2/clic_edm_qq_pf/pred_0_7.parquet
|
| 810 |
+
[2025-02-08 05:24:24,538] INFO: Time taken to make predictions on device 0 is: 0.07 min
|
| 811 |
+
[2025-02-08 05:24:24,538] INFO: Time taken to make predictions on device 0 is: 0.07 min
|