Spaces:

bitsnaps
/

Ollama-Playground

Paused

App Files Files Community

bitsnaps commited on Sep 22

Commit

242d22e

verified ·

1 Parent(s): 10456f4

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -10

app.py CHANGED Viewed

@@ -1,13 +1,15 @@
-# file: app.py
 import os
 import io
 import json
 import uuid
 import base64
 import time
 from typing import List, Dict, Tuple, Optional
 import gradio as gr
 # We use the official Ollama Python client for convenience
 # It respects the OLLAMA_HOST env var, but we will also allow overriding via UI.
@@ -26,6 +28,7 @@ APP_DESCRIPTION = """
 A lightweight, fully functional chat UI for Ollama, designed to run on Hugging Face Spaces (Docker).
 - Bring your own Ollama host (set OLLAMA_HOST in repo secrets or via the UI).
 - Streamed responses, model management (list/pull), and basic vision support (image input).
 """
@@ -229,18 +232,14 @@ def stream_chat(
             stream=True,
             options=options,
         ):
-            # The streaming responses from ollama look like:
-            # {'model': '...', 'created_at': '...', 'message': {'role': 'assistant','content':'...'}, 'done': False}
             msg = part.get("message", {}) or {}
             delta = msg.get("content", "")
             if delta:
                 assistant_text_accum += delta
                 chatbot_history[-1] = messages_for_chatbot(assistant_text_accum, None, role="assistant")
-            # Update status with token counts if present
             done = part.get("done", False)
             if done:
-                # End-of-stream stats
                 eval_count = part.get("eval_count", 0)
                 prompt_eval_count = part.get("prompt_eval_count", 0)
                 total = time.time() - start_time
@@ -253,16 +252,25 @@ def stream_chat(
             yield chatbot_history, status_md, convo_messages
         # 5) Save to conversation state: add the final user+assistant to convo_messages
-        # We add only the messages belonging to the conversation (no 'system' here)
         convo_messages = convo_messages + [
-            {"role": "user", "content": user_text or "", **({"images": [encode_image_to_base64(p) for p in (image_files or []) if encode_image_to_base64(p)]} if image_files else {})},
             {"role": "assistant", "content": assistant_text_accum},
         ]
         yield chatbot_history, status_md, convo_messages
     except Exception as e:
-        # Show error inline
         err_msg = f"Error during generation: {e}"
         chatbot_history[-1] = messages_for_chatbot(err_msg, None, role="assistant")
         yield chatbot_history, err_msg, convo_messages
@@ -273,14 +281,13 @@ def clear_conversation():
 def export_conversation(history: List[Dict], convo_messages: List[Dict]) -> Tuple[str, str]:
-    # Export both the chat UI messages and the raw ollama messages
     export_blob = {
         "chat_messages": history,
         "ollama_messages": convo_messages,
         "meta": {
             "title": APP_TITLE,
             "exported_at": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()),
-            "version": "1.0",
         },
     }
     path = f"chat_export_{int(time.time())}.json"
@@ -289,6 +296,46 @@ def export_conversation(history: List[Dict], convo_messages: List[Dict]) -> Tupl
     return path, f"Exported {len(history)} messages to {path}"
 def ui() -> gr.Blocks:
     with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"# {APP_TITLE}")
@@ -370,6 +417,13 @@ def ui() -> gr.Blocks:
                     max_tokens = gr.Slider(0, 8192, value=0, step=16, label="Max New Tokens (0 = auto)")
                 seed = gr.Number(value=None, label="Seed (optional)", precision=0)
         # Wire up actions
         def _on_load():
             # Initialize models list based on default host
@@ -486,6 +540,19 @@ def ui() -> gr.Blocks:
         chatbot.change(_sync_chatbot_state, inputs=chatbot, outputs=state_history)
     return demo

 import os
 import io
 import json
 import uuid
 import base64
 import time
+import random
+import math
 from typing import List, Dict, Tuple, Optional
 import gradio as gr
+import spaces  # Required for ZeroGPU Spaces (@spaces.GPU)
 # We use the official Ollama Python client for convenience
 # It respects the OLLAMA_HOST env var, but we will also allow overriding via UI.
 A lightweight, fully functional chat UI for Ollama, designed to run on Hugging Face Spaces (Docker).
 - Bring your own Ollama host (set OLLAMA_HOST in repo secrets or via the UI).
 - Streamed responses, model management (list/pull), and basic vision support (image input).
+- Compatible with Spaces ZeroGPU via a @spaces.GPU-decorated function (see GPU Tools panel).
 """
             stream=True,
             options=options,
         ):
             msg = part.get("message", {}) or {}
             delta = msg.get("content", "")
             if delta:
                 assistant_text_accum += delta
                 chatbot_history[-1] = messages_for_chatbot(assistant_text_accum, None, role="assistant")
             done = part.get("done", False)
             if done:
                 eval_count = part.get("eval_count", 0)
                 prompt_eval_count = part.get("prompt_eval_count", 0)
                 total = time.time() - start_time
             yield chatbot_history, status_md, convo_messages
         # 5) Save to conversation state: add the final user+assistant to convo_messages
         convo_messages = convo_messages + [
+            {
+                "role": "user",
+                "content": user_text or "",
+                **(
+                    {
+                        "images": [
+                            b for p in (image_files or [])
+                            for b in ([encode_image_to_base64(p)] if encode_image_to_base64(p) else [])
+                        ]
+                    } if image_files else {}
+                ),
+            },
             {"role": "assistant", "content": assistant_text_accum},
         ]
         yield chatbot_history, status_md, convo_messages
     except Exception as e:
         err_msg = f"Error during generation: {e}"
         chatbot_history[-1] = messages_for_chatbot(err_msg, None, role="assistant")
         yield chatbot_history, err_msg, convo_messages
 def export_conversation(history: List[Dict], convo_messages: List[Dict]) -> Tuple[str, str]:
     export_blob = {
         "chat_messages": history,
         "ollama_messages": convo_messages,
         "meta": {
             "title": APP_TITLE,
             "exported_at": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()),
+            "version": "1.1",
         },
     }
     path = f"chat_export_{int(time.time())}.json"
     return path, f"Exported {len(history)} messages to {path}"
+# ---------------------- ZeroGPU support: define a GPU-decorated function ----------------------
+@spaces.GPU
+def gpu_ping(workload: int = 256) -> dict:
+    """
+    Minimal function to satisfy ZeroGPU Spaces requirement and optionally exercise the GPU.
+    If torch with CUDA is available, perform a tiny matmul on GPU; otherwise do a CPU loop.
+    """
+    t0 = time.time()
+    # Light CPU math as fallback
+    acc = 0.0
+    for i in range(max(1, workload)):
+        x = random.random() * 1000.0
+        # harmless math; avoids dependency on numpy
+        s = math.sin(x)
+        c = math.cos(x)
+        t = math.tan(x) if abs(math.cos(x)) > 1e-9 else 1.0
+        acc += s * c / t
+    info = {"mode": "cpu", "ops": workload}
+    # Optional CUDA check (torch not required)
+    try:
+        import torch  # noqa: F401
+        if torch.cuda.is_available():
+            a = torch.randn((256, 256), device="cuda")
+            b = torch.mm(a, a)
+            _ = float(b.mean().item())
+            info["mode"] = "cuda"
+            info["device"] = torch.cuda.get_device_name(torch.cuda.current_device())
+            info["cuda"] = True
+        else:
+            info["cuda"] = False
+    except Exception:
+        # torch not installed or other issue; still fine for ZeroGPU detection
+        info["cuda"] = "unavailable"
+    elapsed = time.time() - t0
+    return {"ok": True, "elapsed_s": round(elapsed, 4), "acc_checksum": float(acc % 1.0), "info": info}
+# ---------------------------------------------------------------------------------------------
 def ui() -> gr.Blocks:
     with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"# {APP_TITLE}")
                     max_tokens = gr.Slider(0, 8192, value=0, step=16, label="Max New Tokens (0 = auto)")
                 seed = gr.Number(value=None, label="Seed (optional)", precision=0)
+                gr.Markdown("## GPU Tools (ZeroGPU compatible)")
+                with gr.Row():
+                    gpu_workload = gr.Slider(64, 4096, value=256, step=64, label="GPU Ping Workload")
+                with gr.Row():
+                    gpu_btn = gr.Button("Run GPU Ping")
+                gpu_out = gr.Textbox(label="GPU Ping Result", lines=6, interactive=False)
         # Wire up actions
         def _on_load():
             # Initialize models list based on default host
         chatbot.change(_sync_chatbot_state, inputs=chatbot, outputs=state_history)
+        # GPU Ping hook
+        def _gpu_ping_ui(n):
+            try:
+                res = gpu_ping(int(n))
+                try:
+                    return json.dumps(res, indent=2)
+                except Exception:
+                    return str(res)
+            except Exception as e:
+                return f"GPU ping failed: {e}"
+        gpu_btn.click(_gpu_ping_ui, inputs=[gpu_workload], outputs=[gpu_out])
     return demo