Spaces:

mahesh1209
/

Doctor-chatbot

Sleeping

mahesh1209 commited on Aug 21

Commit

fc871ce

verified ·

1 Parent(s): f07d740

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,18 @@
 import gradio as gr
 from llama_cpp import Llama
 import os
-# 🔽 Download GGUF model if not present
-model_url = "https://huggingface.co/TheBloke/Meditron-7B-GGUF/resolve/main/meditron-7b.Q4_K_M.gguf"
-model_path = "meditron-7b.Q4_K_M.gguf"
 if not os.path.exists(model_path):
     os.system(f"wget -O {model_path} {model_url}")
-# ⚙️ Load model with GPU acceleration (T4 sweet spot)
 try:
-    llm = Llama(model_path=model_path, n_gpu_layers=35, n_ctx=2048, verbose=False)
     backend = "GPU"
 except Exception:
     llm = Llama(model_path=model_path, n_ctx=2048)
@@ -39,4 +41,3 @@ gr.Interface(
     title="🩺 Medical Diagnosis Chatbot",
     description=f"Enter symptoms to get possible diagnoses. Powered by Meditron-7B ({backend} mode)."
 ).launch()

 import gradio as gr
 from llama_cpp import Llama
 import os
+# 🔽 Download smaller GGUF model (Q4_0 for faster load)
+model_url = "https://huggingface.co/TheBloke/Meditron-7B-GGUF/resolve/main/meditron-7b.Q4_0.gguf"
+model_path = "meditron-7b.Q4_0.gguf"
 if not os.path.exists(model_path):
     os.system(f"wget -O {model_path} {model_url}")
+# ⚙️ Load model with GPU acceleration (T4 optimized)
 try:
+    llm = Llama(model_path=model_path, n_gpu_layers=28, n_ctx=2048, verbose=False)
+    llm("Hello", max_tokens=1)  # 🔥 Warmup to reduce latency
     backend = "GPU"
 except Exception:
     llm = Llama(model_path=model_path, n_ctx=2048)
     title="🩺 Medical Diagnosis Chatbot",
     description=f"Enter symptoms to get possible diagnoses. Powered by Meditron-7B ({backend} mode)."
 ).launch()