Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from llama_cpp import Llama | |
| import os | |
| # 🔽 Download smaller GGUF model (Q4_0 for faster load) | |
| model_url = "https://huggingface.co/TheBloke/Meditron-7B-GGUF/resolve/main/meditron-7b.Q4_0.gguf" | |
| model_path = "meditron-7b.Q4_0.gguf" | |
| if not os.path.exists(model_path): | |
| os.system(f"wget -O {model_path} {model_url}") | |
| # ⚙️ Load model with GPU acceleration (T4 optimized) | |
| try: | |
| llm = Llama(model_path=model_path, n_gpu_layers=28, n_ctx=2048, verbose=False) | |
| llm("Hello", max_tokens=1) # 🔥 Warmup to reduce latency | |
| backend = "GPU" | |
| except Exception: | |
| llm = Llama(model_path=model_path, n_ctx=2048) | |
| backend = "CPU" | |
| # 🧠 Diagnosis function | |
| def diagnose(symptoms): | |
| if not symptoms.strip(): | |
| return "⚠️ Please enter symptoms to receive a diagnosis." | |
| prompt = f"""You are a cautious and knowledgeable medical diagnosis assistant. You do not provide definitive diagnoses, only possible conditions and recommended next steps. Always advise users to consult a licensed physician. | |
| Symptoms: {symptoms} | |
| Diagnosis:""" | |
| try: | |
| output = llm(prompt, max_tokens=512, stop=["User:", "\n\n"]) | |
| return output["choices"][0]["text"].strip() | |
| except Exception as e: | |
| return f"⚠️ Model error: {str(e)}" | |
| # 🎨 Gradio UI | |
| gr.Interface( | |
| fn=diagnose, | |
| inputs=gr.Textbox(lines=5, placeholder="e.g. fever, cough, fatigue..."), | |
| outputs="text", | |
| title="🩺 Medical Diagnosis Chatbot", | |
| description=f"Enter symptoms to get possible diagnoses. Powered by Meditron-7B ({backend} mode)." | |
| ).launch() | |