app.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
|
| 4 |
# Load the model and tokenizer (you may need to adjust device_map or other settings depending on your hardware)
|
| 5 |
tokenizer = AutoTokenizer.from_pretrained("TheBloke/Chronoboros-33B-GPTQ")
|
| 6 |
model = AutoModelForCausalLM.from_pretrained("TheBloke/Chronoboros-33B-GPTQ", device_map="auto")
|
| 7 |
|
|
|
|
| 8 |
def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
|
| 9 |
# Build the prompt using conversation history
|
| 10 |
prompt = f"{system_message}\n"
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import spaces
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
|
| 5 |
# Load the model and tokenizer (you may need to adjust device_map or other settings depending on your hardware)
|
| 6 |
tokenizer = AutoTokenizer.from_pretrained("TheBloke/Chronoboros-33B-GPTQ")
|
| 7 |
model = AutoModelForCausalLM.from_pretrained("TheBloke/Chronoboros-33B-GPTQ", device_map="auto")
|
| 8 |
|
| 9 | |
| 10 |
def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
|
| 11 |
# Build the prompt using conversation history
|
| 12 |
prompt = f"{system_message}\n"
|