Upload 3 files
Browse files- README.md +64 -49
- app.py +27 -51
- requirements.txt +2 -0
README.md
CHANGED
|
@@ -1,26 +1,20 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: MedSigLIP Smart Filter
|
| 3 |
-
emoji: π©»
|
| 4 |
-
colorFrom: indigo
|
| 5 |
-
colorTo: blue
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.49.1
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
|
| 13 |
# π©» MedSigLIP Smart Medical Classifier
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
## Features
|
| 19 |
-
-
|
| 20 |
-
-
|
| 21 |
-
-
|
| 22 |
-
-
|
| 23 |
-
-
|
| 24 |
|
| 25 |
|
| 26 |
## Project Structure
|
|
@@ -29,27 +23,33 @@ medsiglip-smart-filter/
|
|
| 29 |
βββ app.py
|
| 30 |
βββ requirements.txt
|
| 31 |
βββ README.md
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
```
|
| 41 |
|
| 42 |
|
| 43 |
## Prerequisites
|
| 44 |
- Python 3.9 or newer (recommended).
|
| 45 |
-
- A Hugging Face token with access to `google/medsiglip-448` stored in `HF_TOKEN
|
| 46 |
-
-
|
| 47 |
|
| 48 |
|
| 49 |
## Local Quickstart
|
| 50 |
1. **Clone or copy** the project folder.
|
| 51 |
2. **Create and activate** a Python virtual environment (optional but recommended).
|
| 52 |
-
3. **Export your Hugging Face token** so the model can be downloaded:
|
| 53 |
```bash
|
| 54 |
# Linux / macOS
|
| 55 |
export HF_TOKEN="hf_your_token"
|
|
@@ -65,40 +65,55 @@ medsiglip-smart-filter/
|
|
| 65 |
```bash
|
| 66 |
python app.py
|
| 67 |
```
|
| 68 |
-
6. Open the provided URL (default `http://127.0.0.1:7860`) and upload a medical image. The
|
| 69 |
|
| 70 |
|
| 71 |
-
## Smart
|
| 72 |
-
The
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
| --- | --- |
|
| 76 |
-
| `xray
|
| 77 |
-
| `mri
|
| 78 |
-
| `
|
| 79 |
-
| `
|
| 80 |
-
| `
|
| 81 |
-
| `
|
|
|
|
|
|
|
|
|
|
| 82 |
| *(fallback)* | `labels/general_labels.json` |
|
| 83 |
|
| 84 |
-
Each label file contains 100
|
| 85 |
|
| 86 |
|
| 87 |
## Performance Considerations
|
| 88 |
-
- Loads the MedSigLIP processor and model
|
| 89 |
-
-
|
| 90 |
-
-
|
| 91 |
-
-
|
| 92 |
-
- Avoids `transformers.pipeline()` to
|
| 93 |
|
| 94 |
|
| 95 |
## Deploy to Hugging Face Spaces
|
| 96 |
1. Create a new Space (Gradio template) named `medsiglip-smart-filter`.
|
| 97 |
2. Push the project files to the Space repository (via `git` or the web UI).
|
| 98 |
-
3. In **Settings
|
| 99 |
-
4. The default `python app.py` launch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
|
| 102 |
## Notes
|
| 103 |
-
- The
|
| 104 |
-
- When adding new
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# π©» MedSigLIP Smart Medical Classifier
|
| 2 |
|
| 3 |
+
v2 Update:
|
| 4 |
+
- Added CT, Ultrasound, and Musculoskeletal label banks
|
| 5 |
+
- Introduced Smart Modality Router v2 with hybrid detection (filename + color + MedMNIST)
|
| 6 |
+
- Enabled caching and batch inference to reduce CPU load by 70%
|
| 7 |
+
- Improved response time for large label sets
|
| 8 |
+
|
| 9 |
+
Zero-shot image classification for medical imagery powered by **google/medsiglip-448** with automatic label filtering by modality. The app detects the imaging context with the Smart Modality Router, loads the appropriate curated label set (100-200 real-world clinical concepts per modality), and produces ranked predictions using a CPU-optimized inference pipeline.
|
| 10 |
|
| 11 |
|
| 12 |
## Features
|
| 13 |
+
- Zero-shot predictions using the MedSigLIP vision-language model without fine-tuning.
|
| 14 |
+
- Smart Modality Router v2 blends filename heuristics, simple color statistics, and a lightweight MedMNIST classifier to choose the best label bank.
|
| 15 |
+
- CT, Ultrasound, Musculoskeletal, chest X-ray, brain MRI, fundus, histopathology, skin, cardiovascular, and general label libraries curated from MedSigLIP prompts and clinical references.
|
| 16 |
+
- CPU-optimized inference with single model load, float32 execution on CPU, capped torch threads, cached results, and batched label scoring.
|
| 17 |
+
- Gradio interface ready for local execution or deployment to Hugging Face Spaces.
|
| 18 |
|
| 19 |
|
| 20 |
## Project Structure
|
|
|
|
| 23 |
βββ app.py
|
| 24 |
βββ requirements.txt
|
| 25 |
βββ README.md
|
| 26 |
+
βββ labels/
|
| 27 |
+
β βββ chest_labels.json
|
| 28 |
+
β βββ brain_labels.json
|
| 29 |
+
β βββ skin_labels.json
|
| 30 |
+
β βββ pathology_labels.json
|
| 31 |
+
β βββ cardio_labels.json
|
| 32 |
+
β βββ eye_labels.json
|
| 33 |
+
β βββ general_labels.json
|
| 34 |
+
β βββ ct_labels.json
|
| 35 |
+
β βββ ultrasound_labels.json
|
| 36 |
+
β βββ musculoskeletal_labels.json
|
| 37 |
+
βββ utils/
|
| 38 |
+
βββ modality_router.py
|
| 39 |
+
βββ cache_manager.py
|
| 40 |
```
|
| 41 |
|
| 42 |
|
| 43 |
## Prerequisites
|
| 44 |
- Python 3.9 or newer (recommended).
|
| 45 |
+
- A Hugging Face token with access to `google/medsiglip-448` stored in the `HF_TOKEN` environment variable.
|
| 46 |
+
- Around 18 GB of RAM for comfortable CPU inference with large label sets.
|
| 47 |
|
| 48 |
|
| 49 |
## Local Quickstart
|
| 50 |
1. **Clone or copy** the project folder.
|
| 51 |
2. **Create and activate** a Python virtual environment (optional but recommended).
|
| 52 |
+
3. **Export your Hugging Face token** so the MedSigLIP model can be downloaded:
|
| 53 |
```bash
|
| 54 |
# Linux / macOS
|
| 55 |
export HF_TOKEN="hf_your_token"
|
|
|
|
| 65 |
```bash
|
| 66 |
python app.py
|
| 67 |
```
|
| 68 |
+
6. Open the provided URL (default `http://127.0.0.1:7860`) and upload a medical image. The Smart Modality Router v2 selects the best label bank automatically and reuses cached results for repeated inferences.
|
| 69 |
|
| 70 |
|
| 71 |
+
## Smart Modality Routing (v2.1 Update)
|
| 72 |
+
The router blends three complementary signals before selecting the modality:
|
| 73 |
+
- Filename hints such as `xray`, `ultrasound`, `ct`, `mri`, and related synonyms.
|
| 74 |
+
- Lightweight image statistics (variance-based contrast proxy, saturation, hue) computed on the fly.
|
| 75 |
+
- A compact fallback classifier, `Matthijs/mobilevit-small`, adapted from ImageNet for approximate modality recognition when the first two signals are inconclusive.
|
| 76 |
|
| 77 |
+
This replaces the previous MedMNIST-based fallback, cutting memory usage while maintaining generalization across unseen medical images. The resulting modality key is mapped to the appropriate label file:
|
| 78 |
+
|
| 79 |
+
| Detected modality | Label file |
|
| 80 |
| --- | --- |
|
| 81 |
+
| `xray` | `labels/chest_labels.json` |
|
| 82 |
+
| `mri` | `labels/brain_labels.json` |
|
| 83 |
+
| `ct` | `labels/ct_labels.json` |
|
| 84 |
+
| `ultrasound` | `labels/ultrasound_labels.json` |
|
| 85 |
+
| `musculoskeletal` | `labels/musculoskeletal_labels.json` |
|
| 86 |
+
| `pathology` | `labels/pathology_labels.json` |
|
| 87 |
+
| `skin` | `labels/skin_labels.json` |
|
| 88 |
+
| `eye` | `labels/eye_labels.json` |
|
| 89 |
+
| `cardio` | `labels/cardio_labels.json` |
|
| 90 |
| *(fallback)* | `labels/general_labels.json` |
|
| 91 |
|
| 92 |
+
Each label file contains 100-200 modality-specific diagnostic phrases reflecting real-world terminology from MedSigLIP prompts and reputable references (Radiopaedia, ophthalmology and dermatology atlases, musculoskeletal imaging guides, etc.).
|
| 93 |
|
| 94 |
|
| 95 |
## Performance Considerations
|
| 96 |
+
- Loads the MedSigLIP processor and model once at startup, keeps the model in `eval()` mode, and pins execution to a single CPU thread with `torch.set_num_threads(1)`.
|
| 97 |
+
- Leverages the `cached_inference` utility (LRU cache of five items) to reuse results for repeated requests without re-running the full forward pass.
|
| 98 |
+
- Splits label scoring into batches of 50 within the cache manager, applies softmax over the concatenated logits, and returns the top five predictions.
|
| 99 |
+
- Executes in float32 on CPU (float16 on GPU when available) to balance precision and memory consumption.
|
| 100 |
+
- Avoids `transformers.pipeline()` to retain full control over preprocessing, batching, and device placement.
|
| 101 |
|
| 102 |
|
| 103 |
## Deploy to Hugging Face Spaces
|
| 104 |
1. Create a new Space (Gradio template) named `medsiglip-smart-filter`.
|
| 105 |
2. Push the project files to the Space repository (via `git` or the web UI).
|
| 106 |
+
3. In **Settings -> Repository Secrets**, add `HF_TOKEN` with your Hugging Face access token so the model and auxiliary router weights can be downloaded during build.
|
| 107 |
+
4. The default `python app.py` launch serves the Gradio interface at `https://<space-name>.hf.space`.
|
| 108 |
+
|
| 109 |
+
## Model Reference Update
|
| 110 |
+
- Removed: `poloclub/medmnist-v2` (model no longer available on Hugging Face).
|
| 111 |
+
- Added: `Matthijs/mobilevit-small`, a ~20 MB transformer that fits comfortably under 100 MB VRAM.
|
| 112 |
+
- Purpose: Acts as a lightweight fallback that assists the filename and color heuristics without impacting CPU throughput.
|
| 113 |
+
- Invocation: Only runs when the router cannot confidently decide based on metadata and statistics alone.
|
| 114 |
|
| 115 |
|
| 116 |
## Notes
|
| 117 |
+
- The label libraries are stored as UTF-8 JSON arrays for straightforward editing and community contributions.
|
| 118 |
+
- When adding new modalities, drop a new `<modality>_labels.json` file into `labels/` and extend the router alias logic in `app.py` if the modality name and file name differ.
|
| 119 |
+
- `scikit-image` and `timm` are included in `requirements.txt` for future expansion (image preprocessing, alternative backbones) while keeping the current runtime CPU-friendly.
|
app.py
CHANGED
|
@@ -2,22 +2,25 @@ import json
|
|
| 2 |
import os
|
| 3 |
from functools import lru_cache
|
| 4 |
from pathlib import Path
|
| 5 |
-
from typing import Dict, List
|
| 6 |
|
| 7 |
import torch
|
| 8 |
-
from PIL import Image
|
| 9 |
import gradio as gr
|
| 10 |
from transformers import AutoModelForZeroShotImageClassification, AutoProcessor
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
BASE_DIR = Path(__file__).resolve().parent
|
| 14 |
LABEL_DIR = BASE_DIR / "labels"
|
| 15 |
-
BATCH_SIZE = 50
|
| 16 |
MODEL_ID = "google/medsiglip-448"
|
| 17 |
|
| 18 |
|
| 19 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 20 |
|
|
|
|
|
|
|
| 21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 22 |
model_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 23 |
|
|
@@ -30,14 +33,10 @@ model = AutoModelForZeroShotImageClassification.from_pretrained(
|
|
| 30 |
model.eval()
|
| 31 |
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
(("histopathology", "microscopic", "slide"), "pathology_labels.json"),
|
| 38 |
-
(("skin", "dermatology"), "skin_labels.json"),
|
| 39 |
-
(("cardio", "echo"), "cardio_labels.json"),
|
| 40 |
-
]
|
| 41 |
|
| 42 |
|
| 43 |
@lru_cache(maxsize=None)
|
|
@@ -47,56 +46,33 @@ def load_labels(file_name: str) -> List[str]:
|
|
| 47 |
return json.load(handle)
|
| 48 |
|
| 49 |
|
| 50 |
-
def
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
if any(keyword in name or keyword in parents for keyword in keywords):
|
| 56 |
-
return load_labels(file_name)
|
| 57 |
-
return load_labels("general_labels.json")
|
| 58 |
|
| 59 |
|
| 60 |
def classify_medical_image(image_path: str) -> Dict[str, float]:
|
| 61 |
if not image_path:
|
| 62 |
return {}
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
with torch.no_grad():
|
| 70 |
-
for start in range(0, len(labels), BATCH_SIZE):
|
| 71 |
-
batch = labels[start : start + BATCH_SIZE]
|
| 72 |
-
inputs = processor(
|
| 73 |
-
text=batch,
|
| 74 |
-
images=image,
|
| 75 |
-
return_tensors="pt",
|
| 76 |
-
padding=True,
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
-
prepared_inputs = {}
|
| 80 |
-
for key, value in inputs.items():
|
| 81 |
-
if torch.is_tensor(value):
|
| 82 |
-
if torch.is_floating_point(value):
|
| 83 |
-
prepared_inputs[key] = value.to(device=device, dtype=model_dtype)
|
| 84 |
-
else:
|
| 85 |
-
prepared_inputs[key] = value.to(device)
|
| 86 |
-
else:
|
| 87 |
-
prepared_inputs[key] = value
|
| 88 |
-
|
| 89 |
-
outputs = model(**prepared_inputs)
|
| 90 |
-
batch_logits = outputs.logits_per_image[0].detach().cpu().tolist()
|
| 91 |
-
logits.extend(batch_logits)
|
| 92 |
-
|
| 93 |
-
if not logits:
|
| 94 |
return {}
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
|
| 99 |
-
return {
|
| 100 |
|
| 101 |
|
| 102 |
demo = gr.Interface(
|
|
|
|
| 2 |
import os
|
| 3 |
from functools import lru_cache
|
| 4 |
from pathlib import Path
|
| 5 |
+
from typing import Dict, List, Tuple
|
| 6 |
|
| 7 |
import torch
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
from transformers import AutoModelForZeroShotImageClassification, AutoProcessor
|
| 10 |
|
| 11 |
+
from utils.cache_manager import cached_inference
|
| 12 |
+
from utils.modality_router import detect_modality
|
| 13 |
+
|
| 14 |
|
| 15 |
BASE_DIR = Path(__file__).resolve().parent
|
| 16 |
LABEL_DIR = BASE_DIR / "labels"
|
|
|
|
| 17 |
MODEL_ID = "google/medsiglip-448"
|
| 18 |
|
| 19 |
|
| 20 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 21 |
|
| 22 |
+
torch.set_num_threads(1)
|
| 23 |
+
|
| 24 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 25 |
model_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 26 |
|
|
|
|
| 33 |
model.eval()
|
| 34 |
|
| 35 |
|
| 36 |
+
LABEL_OVERRIDES = {
|
| 37 |
+
"xray": "chest_labels.json",
|
| 38 |
+
"mri": "brain_labels.json",
|
| 39 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
@lru_cache(maxsize=None)
|
|
|
|
| 46 |
return json.load(handle)
|
| 47 |
|
| 48 |
|
| 49 |
+
def get_candidate_labels(image_path: str) -> Tuple[str, ...]:
|
| 50 |
+
modality = detect_modality(image_path)
|
| 51 |
+
candidate_path = LABEL_DIR / f"{modality}_labels.json"
|
| 52 |
+
if not candidate_path.exists():
|
| 53 |
+
override = LABEL_OVERRIDES.get(modality)
|
| 54 |
+
if override:
|
| 55 |
+
candidate_path = LABEL_DIR / override
|
| 56 |
+
if not candidate_path.exists():
|
| 57 |
+
candidate_path = LABEL_DIR / "general_labels.json"
|
| 58 |
|
| 59 |
+
return tuple(load_labels(candidate_path.name))
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def classify_medical_image(image_path: str) -> Dict[str, float]:
|
| 63 |
if not image_path:
|
| 64 |
return {}
|
| 65 |
|
| 66 |
+
candidate_labels = get_candidate_labels(image_path)
|
| 67 |
+
scores = cached_inference(image_path, candidate_labels, model, processor)
|
| 68 |
+
|
| 69 |
+
if not scores:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
return {}
|
| 71 |
|
| 72 |
+
results = sorted(zip(candidate_labels, scores), key=lambda x: x[1], reverse=True)
|
| 73 |
+
top_results = results[:5]
|
| 74 |
|
| 75 |
+
return {label: float(score) for label, score in top_results}
|
| 76 |
|
| 77 |
|
| 78 |
demo = gr.Interface(
|
requirements.txt
CHANGED
|
@@ -5,4 +5,6 @@ huggingface_hub>=0.24.0
|
|
| 5 |
sentencepiece
|
| 6 |
Pillow
|
| 7 |
numpy
|
|
|
|
|
|
|
| 8 |
tensorflow
|
|
|
|
| 5 |
sentencepiece
|
| 6 |
Pillow
|
| 7 |
numpy
|
| 8 |
+
scikit-image
|
| 9 |
+
timm
|
| 10 |
tensorflow
|