Training in progress, step 19

Files changed (6) hide show

adapter_config.json CHANGED Viewed

@@ -3,6 +3,9 @@
   "auto_mapping": null,
   "base_model_name_or_path": "bigscience/bloom-560m",
   "bias": "none",
   "fan_in_fan_out": null,
   "inference_mode": true,
   "init_lora_weights": true,
@@ -10,22 +13,27 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "dense",
     "dense_h_to_4h",
-    "dense_4h_to_h",
-    "query_key_value"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
 }

   "auto_mapping": null,
   "base_model_name_or_path": "bigscience/bloom-560m",
   "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
   "fan_in_fan_out": null,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "dense_h_to_4h",
+    "query_key_value",
+    "dense",
+    "dense_4h_to_h"
   ],
+  "target_parameters": [],
   "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:668d2d9d08add152990ebf5a6b30e8555466a3aa8aa0fc89105b820786514ab8
-size 12609416

 version https://git-lfs.github.com/spec/v1
+oid sha256:b26ef208f28416a0712890337e4681d8a57f168195040a421fe8104533e06bc2
+size 50358592

chat_template.jinja ADDED Viewed

+{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>
+'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>
+' }}{% endif %}

config.json CHANGED Viewed

@@ -1,6 +1,4 @@
 {
-  "_attn_implementation_autoset": true,
-  "_name_or_path": "bigscience/bloom-560m",
   "apply_residual_connection_post_layernorm": false,
   "architectures": [
     "BloomForCausalLM"
@@ -25,7 +23,8 @@
   "skip_bias_add": true,
   "skip_bias_add_qkv": false,
   "slow_but_exact": false,
-  "transformers_version": "4.46.0",
   "unk_token_id": 0,
   "use_cache": false,
   "vocab_size": 250880

 {
   "apply_residual_connection_post_layernorm": false,
   "architectures": [
     "BloomForCausalLM"
   "skip_bias_add": true,
   "skip_bias_add_qkv": false,
   "slow_but_exact": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.55.0",
   "unk_token_id": 0,
   "use_cache": false,
   "vocab_size": 250880

tokenizer_config.json CHANGED Viewed

@@ -35,9 +35,9 @@
     }
   },
   "bos_token": "<s>",
-  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "merges_file": null,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",

     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "extra_special_tokens": {},
   "merges_file": null,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3afe0117788319fb4249f549c46803458c1cdd206c0549bdaaf9b932eae77271
-size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2b279f4dc88c9bef838fd287ae00cc55d02565da6fc52cbcdd320370823a3f4
+size 7352