| | test_stage: |
| | obcq_modifiers: |
| | SmoothQuantModifier: |
| | smoothing_strength: 0.5 |
| | mappings: [ |
| | [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], |
| | [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] |
| | ] |
| | QuantizationModifier: |
| | ignore: |
| | |
| | - LlamaRotaryEmbedding |
| | - LlamaRMSNorm |
| | - SiLUActivation |
| | |
| | - QuantizableMatMul |
| | |
| | - model.layers.21.mlp.down_proj |
| | - model.layers.7.mlp.down_proj |
| | - model.layers.2.mlp.down_proj |
| | - model.layers.20.mlp.down_proj |
| | - model.layers.19.mlp.down_proj |
| | post_oneshot_calibration: true |
| | scheme_overrides: |
| | Embedding: |
| | input_activations: null |
| | weights: |
| | num_bits: 8 |
| | symmetric: false |
| | SparseGPTModifier: |
| | sparsity: 0.5 |
| | block_size: 128 |
| | sequential_update: true |
| | quantize: true |
| | percdamp: 0.01 |
| | mask_structure: "0:0" |
| | targets: ["re:model.layers.\\d*$"] |