Commit
·
51c1747
1
Parent(s):
2388659
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,12 +34,12 @@ print(f"Starting to load the model {model_name} into memory")
|
|
| 34 |
|
| 35 |
m = AutoModelForCausalLM.from_pretrained(
|
| 36 |
model_name,
|
| 37 |
-
quantization_config=transformers.BitsAndBytesConfig(
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
torch_dtype=torch.bfloat16,
|
| 44 |
device_map={"": 0}
|
| 45 |
)
|
|
|
|
| 34 |
|
| 35 |
m = AutoModelForCausalLM.from_pretrained(
|
| 36 |
model_name,
|
| 37 |
+
#quantization_config=transformers.BitsAndBytesConfig(
|
| 38 |
+
# load_in_4bit=True,
|
| 39 |
+
# bnb_4bit_compute_dtype=torch.bfloat16,
|
| 40 |
+
# bnb_4bit_use_double_quant=True,
|
| 41 |
+
# bnb_4bit_quant_type='nf4' # {'fp4', 'nf4'}
|
| 42 |
+
# ),
|
| 43 |
torch_dtype=torch.bfloat16,
|
| 44 |
device_map={"": 0}
|
| 45 |
)
|