bitsnaps commited on
Commit
242d22e
·
verified ·
1 Parent(s): 10456f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -10
app.py CHANGED
@@ -1,13 +1,15 @@
1
- # file: app.py
2
  import os
3
  import io
4
  import json
5
  import uuid
6
  import base64
7
  import time
 
 
8
  from typing import List, Dict, Tuple, Optional
9
 
10
  import gradio as gr
 
11
 
12
  # We use the official Ollama Python client for convenience
13
  # It respects the OLLAMA_HOST env var, but we will also allow overriding via UI.
@@ -26,6 +28,7 @@ APP_DESCRIPTION = """
26
  A lightweight, fully functional chat UI for Ollama, designed to run on Hugging Face Spaces (Docker).
27
  - Bring your own Ollama host (set OLLAMA_HOST in repo secrets or via the UI).
28
  - Streamed responses, model management (list/pull), and basic vision support (image input).
 
29
  """
30
 
31
 
@@ -229,18 +232,14 @@ def stream_chat(
229
  stream=True,
230
  options=options,
231
  ):
232
- # The streaming responses from ollama look like:
233
- # {'model': '...', 'created_at': '...', 'message': {'role': 'assistant','content':'...'}, 'done': False}
234
  msg = part.get("message", {}) or {}
235
  delta = msg.get("content", "")
236
  if delta:
237
  assistant_text_accum += delta
238
  chatbot_history[-1] = messages_for_chatbot(assistant_text_accum, None, role="assistant")
239
 
240
- # Update status with token counts if present
241
  done = part.get("done", False)
242
  if done:
243
- # End-of-stream stats
244
  eval_count = part.get("eval_count", 0)
245
  prompt_eval_count = part.get("prompt_eval_count", 0)
246
  total = time.time() - start_time
@@ -253,16 +252,25 @@ def stream_chat(
253
  yield chatbot_history, status_md, convo_messages
254
 
255
  # 5) Save to conversation state: add the final user+assistant to convo_messages
256
- # We add only the messages belonging to the conversation (no 'system' here)
257
  convo_messages = convo_messages + [
258
- {"role": "user", "content": user_text or "", **({"images": [encode_image_to_base64(p) for p in (image_files or []) if encode_image_to_base64(p)]} if image_files else {})},
 
 
 
 
 
 
 
 
 
 
 
259
  {"role": "assistant", "content": assistant_text_accum},
260
  ]
261
 
262
  yield chatbot_history, status_md, convo_messages
263
 
264
  except Exception as e:
265
- # Show error inline
266
  err_msg = f"Error during generation: {e}"
267
  chatbot_history[-1] = messages_for_chatbot(err_msg, None, role="assistant")
268
  yield chatbot_history, err_msg, convo_messages
@@ -273,14 +281,13 @@ def clear_conversation():
273
 
274
 
275
  def export_conversation(history: List[Dict], convo_messages: List[Dict]) -> Tuple[str, str]:
276
- # Export both the chat UI messages and the raw ollama messages
277
  export_blob = {
278
  "chat_messages": history,
279
  "ollama_messages": convo_messages,
280
  "meta": {
281
  "title": APP_TITLE,
282
  "exported_at": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()),
283
- "version": "1.0",
284
  },
285
  }
286
  path = f"chat_export_{int(time.time())}.json"
@@ -289,6 +296,46 @@ def export_conversation(history: List[Dict], convo_messages: List[Dict]) -> Tupl
289
  return path, f"Exported {len(history)} messages to {path}"
290
 
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  def ui() -> gr.Blocks:
293
  with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
294
  gr.Markdown(f"# {APP_TITLE}")
@@ -370,6 +417,13 @@ def ui() -> gr.Blocks:
370
  max_tokens = gr.Slider(0, 8192, value=0, step=16, label="Max New Tokens (0 = auto)")
371
  seed = gr.Number(value=None, label="Seed (optional)", precision=0)
372
 
 
 
 
 
 
 
 
373
  # Wire up actions
374
  def _on_load():
375
  # Initialize models list based on default host
@@ -486,6 +540,19 @@ def ui() -> gr.Blocks:
486
 
487
  chatbot.change(_sync_chatbot_state, inputs=chatbot, outputs=state_history)
488
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  return demo
490
 
491
 
 
 
1
  import os
2
  import io
3
  import json
4
  import uuid
5
  import base64
6
  import time
7
+ import random
8
+ import math
9
  from typing import List, Dict, Tuple, Optional
10
 
11
  import gradio as gr
12
+ import spaces # Required for ZeroGPU Spaces (@spaces.GPU)
13
 
14
  # We use the official Ollama Python client for convenience
15
  # It respects the OLLAMA_HOST env var, but we will also allow overriding via UI.
 
28
  A lightweight, fully functional chat UI for Ollama, designed to run on Hugging Face Spaces (Docker).
29
  - Bring your own Ollama host (set OLLAMA_HOST in repo secrets or via the UI).
30
  - Streamed responses, model management (list/pull), and basic vision support (image input).
31
+ - Compatible with Spaces ZeroGPU via a @spaces.GPU-decorated function (see GPU Tools panel).
32
  """
33
 
34
 
 
232
  stream=True,
233
  options=options,
234
  ):
 
 
235
  msg = part.get("message", {}) or {}
236
  delta = msg.get("content", "")
237
  if delta:
238
  assistant_text_accum += delta
239
  chatbot_history[-1] = messages_for_chatbot(assistant_text_accum, None, role="assistant")
240
 
 
241
  done = part.get("done", False)
242
  if done:
 
243
  eval_count = part.get("eval_count", 0)
244
  prompt_eval_count = part.get("prompt_eval_count", 0)
245
  total = time.time() - start_time
 
252
  yield chatbot_history, status_md, convo_messages
253
 
254
  # 5) Save to conversation state: add the final user+assistant to convo_messages
 
255
  convo_messages = convo_messages + [
256
+ {
257
+ "role": "user",
258
+ "content": user_text or "",
259
+ **(
260
+ {
261
+ "images": [
262
+ b for p in (image_files or [])
263
+ for b in ([encode_image_to_base64(p)] if encode_image_to_base64(p) else [])
264
+ ]
265
+ } if image_files else {}
266
+ ),
267
+ },
268
  {"role": "assistant", "content": assistant_text_accum},
269
  ]
270
 
271
  yield chatbot_history, status_md, convo_messages
272
 
273
  except Exception as e:
 
274
  err_msg = f"Error during generation: {e}"
275
  chatbot_history[-1] = messages_for_chatbot(err_msg, None, role="assistant")
276
  yield chatbot_history, err_msg, convo_messages
 
281
 
282
 
283
  def export_conversation(history: List[Dict], convo_messages: List[Dict]) -> Tuple[str, str]:
 
284
  export_blob = {
285
  "chat_messages": history,
286
  "ollama_messages": convo_messages,
287
  "meta": {
288
  "title": APP_TITLE,
289
  "exported_at": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()),
290
+ "version": "1.1",
291
  },
292
  }
293
  path = f"chat_export_{int(time.time())}.json"
 
296
  return path, f"Exported {len(history)} messages to {path}"
297
 
298
 
299
+ # ---------------------- ZeroGPU support: define a GPU-decorated function ----------------------
300
+ @spaces.GPU
301
+ def gpu_ping(workload: int = 256) -> dict:
302
+ """
303
+ Minimal function to satisfy ZeroGPU Spaces requirement and optionally exercise the GPU.
304
+ If torch with CUDA is available, perform a tiny matmul on GPU; otherwise do a CPU loop.
305
+ """
306
+ t0 = time.time()
307
+ # Light CPU math as fallback
308
+ acc = 0.0
309
+ for i in range(max(1, workload)):
310
+ x = random.random() * 1000.0
311
+ # harmless math; avoids dependency on numpy
312
+ s = math.sin(x)
313
+ c = math.cos(x)
314
+ t = math.tan(x) if abs(math.cos(x)) > 1e-9 else 1.0
315
+ acc += s * c / t
316
+
317
+ info = {"mode": "cpu", "ops": workload}
318
+ # Optional CUDA check (torch not required)
319
+ try:
320
+ import torch # noqa: F401
321
+ if torch.cuda.is_available():
322
+ a = torch.randn((256, 256), device="cuda")
323
+ b = torch.mm(a, a)
324
+ _ = float(b.mean().item())
325
+ info["mode"] = "cuda"
326
+ info["device"] = torch.cuda.get_device_name(torch.cuda.current_device())
327
+ info["cuda"] = True
328
+ else:
329
+ info["cuda"] = False
330
+ except Exception:
331
+ # torch not installed or other issue; still fine for ZeroGPU detection
332
+ info["cuda"] = "unavailable"
333
+
334
+ elapsed = time.time() - t0
335
+ return {"ok": True, "elapsed_s": round(elapsed, 4), "acc_checksum": float(acc % 1.0), "info": info}
336
+ # ---------------------------------------------------------------------------------------------
337
+
338
+
339
  def ui() -> gr.Blocks:
340
  with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
341
  gr.Markdown(f"# {APP_TITLE}")
 
417
  max_tokens = gr.Slider(0, 8192, value=0, step=16, label="Max New Tokens (0 = auto)")
418
  seed = gr.Number(value=None, label="Seed (optional)", precision=0)
419
 
420
+ gr.Markdown("## GPU Tools (ZeroGPU compatible)")
421
+ with gr.Row():
422
+ gpu_workload = gr.Slider(64, 4096, value=256, step=64, label="GPU Ping Workload")
423
+ with gr.Row():
424
+ gpu_btn = gr.Button("Run GPU Ping")
425
+ gpu_out = gr.Textbox(label="GPU Ping Result", lines=6, interactive=False)
426
+
427
  # Wire up actions
428
  def _on_load():
429
  # Initialize models list based on default host
 
540
 
541
  chatbot.change(_sync_chatbot_state, inputs=chatbot, outputs=state_history)
542
 
543
+ # GPU Ping hook
544
+ def _gpu_ping_ui(n):
545
+ try:
546
+ res = gpu_ping(int(n))
547
+ try:
548
+ return json.dumps(res, indent=2)
549
+ except Exception:
550
+ return str(res)
551
+ except Exception as e:
552
+ return f"GPU ping failed: {e}"
553
+
554
+ gpu_btn.click(_gpu_ping_ui, inputs=[gpu_workload], outputs=[gpu_out])
555
+
556
  return demo
557
 
558