Spaces:

VLAI-AIVN
/

AIO2024M10_Tutorial_Tool_Calling

Running

wjnwjn59 commited on May 11

Commit

7cc25b5

1 Parent(s): 0f47a3e

change temperature

Files changed (2) hide show

app.py CHANGED Viewed

@@ -49,7 +49,6 @@ def inference(pil_img, prompt, task, temperature):
         except Exception:
             pass   # if deletion fails we just move on
-# ────────────────────────────  UI  ────────────────────────────
 def create_header():
     with gr.Row():
         with gr.Column(scale=1):

         except Exception:
             pass   # if deletion fails we just move on
 def create_header():
     with gr.Row():
         with gr.Column(scale=1):

src/llm/chat.py CHANGED Viewed

@@ -18,11 +18,12 @@ Here is a list of functions in JSON format that you can invoke.\n\n{functions}\n
 device = "cuda" if torch.cuda.is_available() else "cpu"
 class FunctionCallingChat:
-    def __init__(self, model_id: str = "meta-llama/Llama-3.2-1B-Instruct"):
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.model     = AutoModelForCausalLM.from_pretrained(
             model_id, device_map=device, torch_dtype=torch.bfloat16
         )
     def __call__(self, user_msg: str) -> dict:
         messages = [
@@ -31,7 +32,7 @@ class FunctionCallingChat:
         ]
         generation_cfg = GenerationConfig(
-            max_new_tokens=128, temperature=0.2, top_p=0.95, do_sample=True
         )
         tokenized = self.tokenizer.apply_chat_template(

 device = "cuda" if torch.cuda.is_available() else "cpu"
 class FunctionCallingChat:
+    def __init__(self, model_id: str = "meta-llama/Llama-3.2-1B-Instruct", temperature: float = 0.7):
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.model     = AutoModelForCausalLM.from_pretrained(
             model_id, device_map=device, torch_dtype=torch.bfloat16
         )
+        self.temperature = temperature
     def __call__(self, user_msg: str) -> dict:
         messages = [
         ]
         generation_cfg = GenerationConfig(
+            max_new_tokens=128, temperature=self.temperature, top_p=0.95, do_sample=True
         )
         tokenized = self.tokenizer.apply_chat_template(