Spaces:
Running
Running
File size: 8,453 Bytes
881e3d2 b8753ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>Local AI (JS only)</title>
<style>
body { font-family: system-ui, sans-serif; max-width: 820px; margin: 24px auto; padding: 0 12px; }
textarea { width: 100%; min-height: 140px; }
button, select { padding: 10px 14px; margin: 6px 6px 0 0; }
.row { margin: 16px 0; }
#log { white-space: pre-wrap; font: 13px/1.4 monospace; background:#f6f6f6; padding:12px; border-radius:8px; }
#out { min-height: 48px; }
</style>
<h2>Local AI on your device (JS only)</h2>
<div class="row">
<label>Task:
<select id="task">
<option value="sentiment">Sentiment (DistilBERT)</option>
<option value="summarize">Summarize (T5-small)</option>
<option value="whisper">Transcribe (Whisper tiny.en)</option>
</select>
</label>
<button id="initBtn">Load model</button>
</div>
<div class="row" id="textRow">
<textarea id="text" placeholder="Type or paste text…"></textarea>
<button id="runBtn" disabled>Run</button>
</div>
<div class="row" id="audioRow" style="display:none">
<button id="recBtn" disabled>🎙️ Start / Stop Recording</button>
<button id="transcribeBtn" disabled>Transcribe</button>
</div>
<h3>Output</h3>
<div id="out"></div>
<h3>Log</h3>
<div id="log"></div>
<script type="module">
import { pipeline, read_audio } from "https://cdn.jsdelivr.net/npm/@xenova/transformers";
const $ = (id) => document.getElementById(id);
const log = (s) => $('log').textContent += s + "\n";
const out = (html) => $('out').innerHTML = html;
let runner = null;
// --- Recording state ---
let audioCtx = null, processor = null, inputNode = null, stream = null;
let pcmChunks = []; // Float32 chunks
let wavBlob = null; // built on stop
function enableTextUI(en) { $('runBtn').disabled = !en; }
function enableAudioUI(en) { $('recBtn').disabled = !en; $('transcribeBtn').disabled = true; }
function toggleTaskUI() {
const isWhisper = ($('task').value === 'whisper');
$('textRow').style.display = isWhisper ? 'none' : '';
$('audioRow').style.display = isWhisper ? '' : 'none';
$('log').textContent = ''; out('');
enableTextUI(false); enableAudioUI(false);
}
$('task').addEventListener('change', toggleTaskUI);
toggleTaskUI();
// --- Robust progress (0–1 or 0–100) ---
let lastPct = -1, lastTime = 0;
function progressLogger(p) {
let pct = null;
if (p && typeof p.progress === 'number') pct = (p.progress <= 1 ? p.progress * 100 : p.progress);
else if (p && p.loaded && p.total) pct = (p.loaded / p.total) * 100;
if (pct == null) return;
pct = Math.max(0, Math.min(100, Math.round(pct)));
const now = performance.now();
if (pct !== lastPct && (now - lastTime > 120)) { log(`Download: ${pct}%`); lastPct = pct; lastTime = now; }
}
// --- Load model ---
$('initBtn').onclick = async () => {
try {
$('log').textContent = ''; out('');
runner = null; enableTextUI(false); enableAudioUI(false);
lastPct = -1; lastTime = 0;
const task = $('task').value;
log('Loading… (first time may download model to cache)');
if (task === 'sentiment') {
runner = await pipeline("text-classification",
"Xenova/distilbert-base-uncased-finetuned-sst-2-english",
{ progress_callback: progressLogger });
log('Model ready ✅'); enableTextUI(true);
} else if (task === 'summarize') {
runner = await pipeline("summarization", "Xenova/t5-small",
{ progress_callback: progressLogger });
log('Model ready ✅'); enableTextUI(true);
} else {
runner = await pipeline("automatic-speech-recognition",
"Xenova/whisper-tiny.en",
{ progress_callback: progressLogger, chunk_length_s: 15, stride_length_s: 2 });
log('Model ready ✅'); enableAudioUI(true);
}
} catch (e) {
log('Error loading model: ' + (e?.message ?? e));
}
};
// --- Run text tasks ---
$('runBtn').onclick = async () => {
if (!runner) return log('Load a model first.');
const task = $('task').value, txt = $('text').value.trim();
if (!txt) return out('<i>Enter some text.</i>');
out('Running…');
try {
if (task === 'sentiment') {
const res = await runner(txt);
out(`<pre>${JSON.stringify(res, null, 2)}</pre>`);
} else {
// T5: "summarize: " + chunking
const MAX = 2000; const chunks = [];
for (let i = 0; i < txt.length; i += MAX) chunks.push(txt.slice(i, i + MAX));
const parts = [];
for (const c of chunks) {
const r = await runner(`summarize: ${c}`, { max_new_tokens: 120 });
parts.push(Array.isArray(r) ? r[0]?.summary_text : r?.summary_text);
}
out(`<div><b>Summary:</b><br>${parts.join(' ')}</div>`);
}
} catch (e) { out(''); log('Run error: ' + (e?.message ?? e)); }
};
// --- Record PCM, then encode WAV (so read_audio can decode) ---
$('recBtn').onclick = async () => {
try {
if (processor) {
// Stop recording
processor.disconnect(); inputNode.disconnect();
if (stream) stream.getTracks().forEach(t => t.stop());
const rate = audioCtx.sampleRate;
wavBlob = encodeWAV(pcmChunks, rate); // audio/wav
// reset
pcmChunks = [];
if (audioCtx) { try { await audioCtx.close(); } catch(_){} }
audioCtx = null; processor = null; inputNode = null; stream = null;
$('recBtn').textContent = '🎙️ Start / Stop Recording';
$('transcribeBtn').disabled = false;
out('Recording stopped. Tap Transcribe.');
return;
}
// Start
stream = await navigator.mediaDevices.getUserMedia({ audio: true });
audioCtx = new (window.AudioContext || window.webkitAudioContext)();
inputNode = audioCtx.createMediaStreamSource(stream);
processor = audioCtx.createScriptProcessor(4096, 1, 1);
processor.onaudioprocess = e => {
const ch = e.inputBuffer.getChannelData(0);
pcmChunks.push(new Float32Array(ch)); // copy
};
inputNode.connect(processor);
processor.connect(audioCtx.destination);
$('recBtn').textContent = '⏹️ Stop';
$('transcribeBtn').disabled = true;
out('Recording… speak now.');
} catch (e) {
log('Mic error (use http://localhost & allow mic): ' + e.name + ' - ' + e.message);
}
};
function encodeWAV(chunks, sampleRate) {
const length = chunks.reduce((a, b) => a + b.length, 0);
const buffer = new ArrayBuffer(44 + length * 2);
const view = new DataView(buffer);
const write = (o, s) => { for (let i = 0; i < s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); };
write(0, 'RIFF'); view.setUint32(4, 36 + length * 2, true); write(8, 'WAVE'); write(12, 'fmt ');
view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, 1, true);
view.setUint32(24, sampleRate, true); view.setUint32(28, sampleRate * 2, true);
view.setUint16(32, 2, true); view.setUint16(34, 16, true); write(36, 'data'); view.setUint32(40, length * 2, true);
let offset = 44;
for (const chunk of chunks) for (let i = 0; i < chunk.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, chunk[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
return new Blob([buffer], { type: 'audio/wav' });
}
// --- Transcribe (read_audio(URL, 16000) → Float32Array → pipeline) ---
$('transcribeBtn').onclick = async () => {
if (!runner) return log('Load Whisper tiny.en first.');
if (!wavBlob) return log('No audio recorded.');
out('Transcribing…');
try {
const url = URL.createObjectURL(wavBlob);
const audio = await read_audio(url, 16000); // returns Float32Array at 16kHz
URL.revokeObjectURL(url);
const result = await runner(audio);
out(`<div><b>Transcript:</b><br>${result.text}</div>`);
} catch (e) {
out(''); log('ASR error: ' + (e?.message ?? e));
} finally {
wavBlob = null; $('transcribeBtn').disabled = true;
}
};
// Env info
log('Secure origin required for mic: use http://localhost');
log('WebGPU available: ' + (!!navigator.gpu));
if ('deviceMemory' in navigator) log('deviceMemory (GB bucket): ' + navigator.deviceMemory);
</script>
</html>
|