import gradio as gr from llama_cpp import Llama import os # 🔽 Download smaller GGUF model (Q4_0 for faster load) model_url = "https://huggingface.co/TheBloke/Meditron-7B-GGUF/resolve/main/meditron-7b.Q4_0.gguf" model_path = "meditron-7b.Q4_0.gguf" if not os.path.exists(model_path): os.system(f"wget -O {model_path} {model_url}") # ⚙️ Load model with GPU acceleration (T4 optimized) try: llm = Llama(model_path=model_path, n_gpu_layers=28, n_ctx=2048, verbose=False) llm("Hello", max_tokens=1) # 🔥 Warmup to reduce latency backend = "GPU" except Exception: llm = Llama(model_path=model_path, n_ctx=2048) backend = "CPU" # 🧠 Diagnosis function def diagnose(symptoms): if not symptoms.strip(): return "⚠️ Please enter symptoms to receive a diagnosis." prompt = f"""You are a cautious and knowledgeable medical diagnosis assistant. You do not provide definitive diagnoses, only possible conditions and recommended next steps. Always advise users to consult a licensed physician. Symptoms: {symptoms} Diagnosis:""" try: output = llm(prompt, max_tokens=512, stop=["User:", "\n\n"]) return output["choices"][0]["text"].strip() except Exception as e: return f"⚠️ Model error: {str(e)}" # 🎨 Gradio UI gr.Interface( fn=diagnose, inputs=gr.Textbox(lines=5, placeholder="e.g. fever, cough, fatigue..."), outputs="text", title="🩺 Medical Diagnosis Chatbot", description=f"Enter symptoms to get possible diagnoses. Powered by Meditron-7B ({backend} mode)." ).launch()