CU-1

Sleeping

App Files Files Community

Matis Despujols commited on Sep 30

Commit

c76abe2

verified ·

1 Parent(s): 3fee73e

Create app.py

Browse files

Files changed (1) hide show

app.py +241 -0

app.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import os
+os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
+import gradio as gr
+import torch
+import cv2
+import numpy as np
+from PIL import Image
+from typing import Tuple, List
+from rfdetr.detr import RFDETRMedium
+# UI Element classes
+CLASSES = ['button', 'field', 'heading', 'iframe', 'image', 'label', 'link', 'text']
+# Color palette for different element types (BGR format for OpenCV)
+CLASS_COLORS = {
+    'button': (46, 204, 113),    # Green
+    'field': (52, 152, 219),     # Blue
+    'heading': (155, 89, 182),   # Purple
+    'iframe': (241, 196, 15),    # Yellow
+    'image': (230, 126, 34),     # Orange
+    'label': (26, 188, 156),     # Turquoise
+    'link': (231, 76, 60),       # Red
+    'text': (149, 165, 166)      # Gray
+}
+# Global model variable
+model = None
+def load_model(model_path: str = "model/full_29.pth"):
+    """Load RF-DETR model"""
+    global model
+    if model is None:
+        print("Loading RF-DETR model...")
+        model = RFDETRMedium(pretrain_weights=model_path, resolution=1600)
+        model.eval()
+        print("Model loaded successfully!")
+    return model
+def draw_detections(
+    image: np.ndarray,
+    boxes: List[Tuple[int, int, int, int]],
+    scores: List[float],
+    classes: List[int],
+    thickness: int = 3,
+    font_scale: float = 0.6
+) -> np.ndarray:
+    """Draw detection boxes and labels on image"""
+    img_with_boxes = image.copy()
+    for box, score, cls_id in zip(boxes, scores, classes):
+        x1, y1, x2, y2 = map(int, box)
+        class_name = CLASSES[cls_id]
+        color = CLASS_COLORS.get(class_name, (255, 255, 255))
+        # Draw rectangle
+        cv2.rectangle(img_with_boxes, (x1, y1), (x2, y2), color, thickness)
+        # Prepare label
+        label = f"{class_name} {score:.2f}"
+        # Calculate label size and position
+        (label_width, label_height), baseline = cv2.getTextSize(
+            label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness=2
+        )
+        # Draw label background
+        label_y = max(y1 - 10, label_height + 10)
+        cv2.rectangle(
+            img_with_boxes,
+            (x1, label_y - label_height - baseline - 5),
+            (x1 + label_width + 5, label_y + baseline - 5),
+            color,
+            -1
+        )
+        # Draw label text
+        cv2.putText(
+            img_with_boxes,
+            label,
+            (x1 + 2, label_y - baseline - 5),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            font_scale,
+            (255, 255, 255),
+            thickness=2
+        )
+    return img_with_boxes
+@torch.inference_mode()
+def detect_ui_elements(
+    image: Image.Image,
+    confidence_threshold: float,
+    line_thickness: int
+) -> Tuple[Image.Image, str]:
+    """
+    Detect UI elements in the uploaded image
+    Args:
+        image: Input PIL Image
+        confidence_threshold: Minimum confidence score for detections
+        line_thickness: Thickness of bounding box lines
+    Returns:
+        Annotated image and detection summary text
+    """
+    if image is None:
+        return None, "Please upload an image first."
+    # Load model
+    model = load_model()
+    # Convert PIL to numpy array (RGB)
+    img_array = np.array(image)
+    # Convert RGB to BGR for OpenCV
+    img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
+    # Run detection (returns supervision Detections object)
+    detections = model.predict(img_array, threshold=confidence_threshold)
+    # Extract detection data
+    filtered_boxes = detections.xyxy  # Bounding boxes in xyxy format
+    filtered_scores = detections.confidence  # Confidence scores
+    filtered_classes = detections.class_id  # Class IDs
+    # Draw detections
+    annotated_img = draw_detections(
+        img_bgr,
+        filtered_boxes.tolist(),
+        filtered_scores.tolist(),
+        filtered_classes.tolist(),
+        thickness=line_thickness
+    )
+    # Convert back to RGB for display
+    annotated_img_rgb = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
+    annotated_pil = Image.fromarray(annotated_img_rgb)
+    # Create summary text
+    summary_lines = [f"**Total detections:** {len(filtered_boxes)}\n"]
+    # Count by class
+    class_counts = {}
+    for cls_id in filtered_classes.tolist():
+        class_name = CLASSES[cls_id]
+        class_counts[class_name] = class_counts.get(class_name, 0) + 1
+    summary_lines.append("**Detected elements:**")
+    for class_name in sorted(class_counts.keys()):
+        count = class_counts[class_name]
+        summary_lines.append(f"- {class_name}: {count}")
+    summary_text = "\n".join(summary_lines)
+    return annotated_pil, summary_text
+# Gradio interface
+with gr.Blocks(title="RF-DETR UI Element Detector", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🎯 RF-DETR UI Element Detector
+    Upload a screenshot or UI mockup to automatically detect interactive elements.
+    This model identifies 8 types of UI components: buttons, fields, headings, iframes, images, labels, links, and text.
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(
+                type="pil",
+                label="📤 Upload Screenshot",
+                height=400
+            )
+            with gr.Accordion("⚙️ Detection Settings", open=True):
+                confidence_slider = gr.Slider(
+                    minimum=0.1,
+                    maximum=0.9,
+                    value=0.35,
+                    step=0.05,
+                    label="Confidence Threshold",
+                    info="Higher values = fewer but more confident detections"
+                )
+                thickness_slider = gr.Slider(
+                    minimum=1,
+                    maximum=6,
+                    value=3,
+                    step=1,
+                    label="Box Line Thickness"
+                )
+            detect_button = gr.Button("🔍 Detect Elements", variant="primary", size="lg")
+            gr.Markdown("""
+            ### 📊 Detected Classes:
+            - 🟢 **button** - Interactive buttons
+            - 🔵 **field** - Input fields
+            - 🟣 **heading** - Headers and titles
+            - 🟡 **iframe** - Embedded frames
+            - 🟠 **image** - Images and icons
+            - 🔷 **label** - Text labels
+            - 🔴 **link** - Hyperlinks
+            - ⚪ **text** - Plain text
+            """)
+        with gr.Column(scale=1):
+            output_image = gr.Image(
+                type="pil",
+                label="🎨 Detected Elements",
+                height=400
+            )
+            summary_output = gr.Markdown(label="📋 Detection Summary")
+    # Examples
+    gr.Markdown("### 💡 Try with example images:")
+    gr.Examples(
+        examples=[
+            # Add example image paths here if available
+        ],
+        inputs=input_image,
+        label="Example Screenshots"
+    )
+    # Connect button
+    detect_button.click(
+        fn=detect_ui_elements,
+        inputs=[input_image, confidence_slider, thickness_slider],
+        outputs=[output_image, summary_output]
+    )
+    gr.Markdown("""
+    ---
+    **Model:** RF-DETR Medium (Resolution: 1600px) | **Framework:** PyTorch
+    """)
+# Launch
+if __name__ == "__main__":
+    demo.queue().launch(share=False)