Spaces:

Safe-Drive-TN
/

Tunisian-License-Plate-Detection-OCR

Running

yassine-mhirsi commited on 15 days ago

Commit

c90fc3f

1 Parent(s): 9c149e4

Add multi-car detection functionality to Gradio app

- Introduced methods to retrieve and process multi-car detection videos.
- Added a new tab in the Gradio interface for multi-car detection, allowing users to select and process videos.
- Updated configuration to include the multi-car detection model.
- Enhanced result formatting to provide detailed statistics and summaries for processed videos.

Files changed (4) hide show

app/gradio_app.py +154 -1
app/models/multi_car_detector.py +240 -0
app/services/multi_car_pipeline.py +128 -0
app/utils/config.py +1 -0

app/gradio_app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import logging
 from app.services.pipeline import get_pipeline
 from app.utils.image_processing import numpy_to_pil
 from app.models.state_farm_model import get_state_farm_detector
 import os
 import glob
@@ -207,6 +208,82 @@ def get_state_farm_files():
     return files
 def process_state_farm(file_path: str) -> Tuple:
     """
     Process State Farm distracted driver detection.
@@ -428,6 +505,81 @@ def create_interface():
                     inputs=[file_selector],
                     outputs=[output_image_state_farm, result_text_state_farm]
                 )
         # Footer
         gr.Markdown("""
@@ -435,12 +587,13 @@ def create_interface():
         ### 📚 About
-        This application uses five state-of-the-art models:
         - **Car Detection**: `Safe-Drive-TN/Car-detection-from-scratch` (Custom CNN)
         - **Plate Detection**: `Safe-Drive-TN/Tunisian-Licence-plate-Detection` (YOLOv8n)
         - **Word Detection**: `Safe-Drive-TN/tunis-word-detection-yolov8s` (YOLOv8s)
         - **OCR**: `microsoft/trocr-base-printed` (TrOCR)
         - **State Farm Detection**: `Safe-Drive-TN/State-farm-detection` (YOLOv8n-cls)
         Made with ❤️
         """)

 from app.services.pipeline import get_pipeline
 from app.utils.image_processing import numpy_to_pil
 from app.models.state_farm_model import get_state_farm_detector
+from app.services.multi_car_pipeline import get_multi_car_pipeline
 import os
 import glob
     return files
+def get_multi_car_videos():
+    """Get list of available videos from datasets/multi-car/."""
+    base_path = "datasets/multi-car"
+    if not os.path.exists(base_path):
+        return []
+    # Get all video files
+    video_extensions = ['*.mp4', '*.avi', '*.mov', '*.mkv']
+    files = []
+    for ext in video_extensions:
+        files.extend(glob.glob(os.path.join(base_path, ext)))
+        files.extend(glob.glob(os.path.join(base_path, ext.upper())))
+    # Sort and return relative paths
+    files = sorted([os.path.relpath(f) for f in files])
+    return files
+def process_multi_car_video(video_path: str) -> Tuple:
+    """
+    Process multi-car detection video.
+    Args:
+        video_path: Path to video file
+    Returns:
+        Tuple of (output_video_path, results text)
+    """
+    if not video_path or not os.path.exists(video_path):
+        return None, "Please select a video from the dropdown"
+    logger.info(f"🎨 Gradio: Processing Multi-Car detection - Video: {video_path}")
+    try:
+        # Get pipeline
+        pipeline = get_multi_car_pipeline()
+        # Process video
+        result = pipeline.process_video(video_path)
+        if not result['success']:
+            error_msg = result.get('error', 'Processing failed')
+            return None, f"**Error:** {error_msg}"
+        # Get detection summary
+        summary = pipeline.get_detection_summary(result['detections_per_frame'])
+        # Format result text
+        result_text = f"""
+## Video Processing Complete
+### **Output Video:** {os.path.basename(result['output_path'])}
+---
+### 📊 Detection Statistics:
+- **Total Frames Processed:** {result['total_frames']}
+- **Total Detections:** {summary['total_detections']}
+- **Average Detections per Frame:** {summary['average_detections_per_frame']:.2f}
+- **Max Detections in a Frame:** {summary['max_detections_per_frame']}
+### 🎯 Detected Classes:
+"""
+        for class_name, count in summary['class_counts'].items():
+            result_text += f"- **{class_name}:** {count} detections\n"
+        result_text += f"\n---\n\n### ⏱️ Processing Time: {result['processing_time']:.2f}s"
+        return result['output_path'], result_text
+    except Exception as e:
+        error_msg = f"Error processing video: {str(e)}"
+        logger.error(error_msg)
+        return None, f"**Error:** {error_msg}"
 def process_state_farm(file_path: str) -> Tuple:
     """
     Process State Farm distracted driver detection.
                     inputs=[file_selector],
                     outputs=[output_image_state_farm, result_text_state_farm]
                 )
+            # Multi-Car Detection Tab
+            with gr.Tab("Multi-Car Detection"):
+                gr.Markdown("""
+                # 🚗 Multi-Car and Driver Detection
+                Select a video from the pre-loaded dataset to detect multiple cars and drivers.
+                **Model:** YOLO (Multiple Car Detection)
+                The model will process the video frame by frame and detect:
+                - Multiple cars
+                - Drivers
+                - Other objects as defined by the model
+                The output video will show bounding boxes and labels for all detected objects.
+                """)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        # Video selector dropdown
+                        available_videos = get_multi_car_videos()
+                        if not available_videos:
+                            gr.Markdown("⚠️ **No videos found in datasets/multi-car/ directory**")
+                            video_selector = gr.Dropdown(
+                                choices=[],
+                                label="Select Video",
+                                interactive=False
+                            )
+                        else:
+                            video_selector = gr.Dropdown(
+                                choices=available_videos,
+                                label="Select Video",
+                                value=available_videos[0] if available_videos else None,
+                                interactive=True
+                            )
+                        process_video_button = gr.Button("🎬 Process Video", variant="primary", size="lg")
+                        result_text_multi_car = gr.Markdown()
+                    with gr.Column(scale=1):
+                        output_video_multi_car = gr.Video(label="Annotated Video Output")
+                def update_video_display(video_path):
+                    """Update video display when file is selected."""
+                    if not video_path or not os.path.exists(video_path):
+                        return None, f"**Please select a video from the dropdown.**"
+                    return video_path, f"**Video selected:** {os.path.basename(video_path)}\n\nClick 'Process Video' to detect cars and drivers."
+                def process_video_and_display(video_path):
+                    """Process video and return results."""
+                    if not video_path:
+                        return None, "Please select a video"
+                    output_video, result_text = process_multi_car_video(video_path)
+                    if output_video and os.path.exists(output_video):
+                        return output_video, result_text
+                    else:
+                        return None, result_text
+                # Update display when video is selected
+                video_selector.change(
+                    fn=update_video_display,
+                    inputs=[video_selector],
+                    outputs=[output_video_multi_car, result_text_multi_car]
+                )
+                # Process when button is clicked
+                process_video_button.click(
+                    fn=process_video_and_display,
+                    inputs=[video_selector],
+                    outputs=[output_video_multi_car, result_text_multi_car]
+                )
         # Footer
         gr.Markdown("""
         ### 📚 About
+        This application uses six state-of-the-art models:
         - **Car Detection**: `Safe-Drive-TN/Car-detection-from-scratch` (Custom CNN)
         - **Plate Detection**: `Safe-Drive-TN/Tunisian-Licence-plate-Detection` (YOLOv8n)
         - **Word Detection**: `Safe-Drive-TN/tunis-word-detection-yolov8s` (YOLOv8s)
         - **OCR**: `microsoft/trocr-base-printed` (TrOCR)
         - **State Farm Detection**: `Safe-Drive-TN/State-farm-detection` (YOLOv8n-cls)
+        - **Multi-Car Detection**: `Safe-Drive-TN/Multiple-Car-Detection` (YOLO)
         Made with ❤️
         """)

app/models/multi_car_detector.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Multiple Car and Driver Detection model using YOLO from HuggingFace.
+"""
+import numpy as np
+from typing import Dict, List, Optional
+from ultralytics import YOLO
+from huggingface_hub import hf_hub_download
+from app.utils.config import MULTI_CAR_DETECTION_MODEL, HF_TOKEN
+class MultiCarDetector:
+    """
+    Detects multiple cars and drivers in images/videos using YOLO.
+    Model hosted on HuggingFace: Safe-Drive-TN/Multiple-Car-Detection
+    """
+    def __init__(self, confidence_threshold: float = 0.25):
+        """Initialize the multi-car detector model."""
+        self.model = None
+        self.confidence_threshold = confidence_threshold
+    def load_model(self):
+        """Load the YOLO model from HuggingFace."""
+        if self.model is not None:
+            return
+        try:
+            # Download model file from HuggingFace
+            model_path = hf_hub_download(
+                repo_id=MULTI_CAR_DETECTION_MODEL,
+                filename="Multiple-Car-Detection/Muliple_Car_Detection.pt",
+                token=HF_TOKEN
+            )
+            # Load YOLO model from downloaded file
+            self.model = YOLO(model_path)
+            print(f"Multi-car detection model loaded successfully from {MULTI_CAR_DETECTION_MODEL}")
+        except Exception as e:
+            print(f"Error loading multi-car detection model: {e}")
+            raise
+    def detect(self, image: np.ndarray) -> List[Dict]:
+        """
+        Detect cars and drivers in an image.
+        Args:
+            image: Input image as numpy array (BGR format)
+        Returns:
+            List of dictionaries, each containing:
+                - bbox: Bounding box as [x1, y1, x2, y2]
+                - confidence: Detection confidence score
+                - class_id: Class ID
+                - class_name: Class name (if available)
+        """
+        if self.model is None:
+            self.load_model()
+        try:
+            # Run inference
+            results = self.model(image, conf=self.confidence_threshold, verbose=False)
+            # Get detections
+            if len(results) == 0 or len(results[0].boxes) == 0:
+                return []
+            # Get all detections
+            boxes = results[0].boxes
+            detections = []
+            # Get class names if available
+            class_names = self.model.names if hasattr(self.model, 'names') else {}
+            for box in boxes:
+                bbox = box.xyxy[0].cpu().numpy().tolist()  # [x1, y1, x2, y2]
+                confidence = float(box.conf[0].cpu().numpy())
+                class_id = int(box.cls[0].cpu().numpy())
+                class_name = class_names.get(class_id, f"class_{class_id}")
+                detections.append({
+                    'bbox': bbox,
+                    'confidence': confidence,
+                    'class_id': class_id,
+                    'class_name': class_name
+                })
+            # Sort by confidence (highest first)
+            detections.sort(key=lambda x: x['confidence'], reverse=True)
+            return detections
+        except Exception as e:
+            print(f"Error during multi-car detection: {e}")
+            return []
+    def predict_video(self, video_path: str, save_path: Optional[str] = None) -> Dict:
+        """
+        Process a video and return annotated video path.
+        Args:
+            video_path: Path to input video file
+            save_path: Optional path to save annotated video (if None, auto-generates)
+        Returns:
+            Dictionary containing:
+                - output_path: Path to annotated video
+                - total_frames: Total number of frames processed
+                - detections_per_frame: List of detections per frame
+        """
+        if self.model is None:
+            self.load_model()
+        try:
+            import os
+            from pathlib import Path
+            # Determine output path
+            if save_path is None:
+                # Create output directory
+                output_dir = Path("output/multi_car_detection")
+                output_dir.mkdir(parents=True, exist_ok=True)
+                # Generate output filename based on input filename
+                input_filename = Path(video_path).stem
+                save_path = str(output_dir / f"{input_filename}_annotated.mp4")
+            # Ensure output directory exists
+            output_dir = Path(save_path).parent
+            output_dir.mkdir(parents=True, exist_ok=True)
+            # Use YOLO's built-in video processing with visualization
+            # Use predict instead of track for more reliable output path control
+            results = self.model.predict(
+                source=video_path,
+                conf=self.confidence_threshold,
+                save=True,
+                save_txt=False,
+                save_conf=True,
+                project=str(output_dir.parent),
+                name=output_dir.name,
+                exist_ok=True,
+                verbose=False
+            )
+            # YOLO saves videos with the same name as input in the output directory
+            # Try to find the output video
+            input_filename = Path(video_path).stem
+            possible_outputs = [
+                output_dir / f"{input_filename}.mp4",
+                output_dir / f"{input_filename}.avi",
+                Path("runs/detect") / output_dir.name / f"{input_filename}.mp4",
+                Path("runs/detect") / output_dir.name / f"{input_filename}.avi",
+            ]
+            output_path = None
+            for possible_path in possible_outputs:
+                if possible_path.exists():
+                    # If we want a specific save_path, copy it there
+                    if str(possible_path) != save_path:
+                        import shutil
+                        shutil.copy2(possible_path, save_path)
+                    output_path = save_path
+                    break
+            # If still not found, search for any video files in output directory
+            if output_path is None:
+                video_files = list(output_dir.glob("*.mp4")) + list(output_dir.glob("*.avi"))
+                if video_files:
+                    # Use the most recently modified one
+                    output_path = str(max(video_files, key=lambda p: p.stat().st_mtime))
+                    if str(output_path) != save_path:
+                        import shutil
+                        shutil.copy2(output_path, save_path)
+                        output_path = save_path
+            # Count frames and get detection stats
+            total_frames = len(results) if isinstance(results, list) else 1
+            detections_per_frame = []
+            for result in results:
+                frame_detections = []
+                if hasattr(result, 'boxes') and result.boxes is not None:
+                    for box in result.boxes:
+                        bbox = box.xyxy[0].cpu().numpy().tolist()
+                        confidence = float(box.conf[0].cpu().numpy())
+                        class_id = int(box.cls[0].cpu().numpy())
+                        class_names = self.model.names if hasattr(self.model, 'names') else {}
+                        class_name = class_names.get(class_id, f"class_{class_id}")
+                        frame_detections.append({
+                            'bbox': bbox,
+                            'confidence': confidence,
+                            'class_id': class_id,
+                            'class_name': class_name
+                        })
+                detections_per_frame.append(frame_detections)
+            if output_path is None:
+                return {
+                    'output_path': None,
+                    'total_frames': total_frames,
+                    'detections_per_frame': detections_per_frame,
+                    'success': False,
+                    'error': 'Could not locate output video file'
+                }
+            return {
+                'output_path': output_path,
+                'total_frames': total_frames,
+                'detections_per_frame': detections_per_frame,
+                'success': True
+            }
+        except Exception as e:
+            print(f"Error during video processing: {e}")
+            import traceback
+            traceback.print_exc()
+            return {
+                'output_path': None,
+                'total_frames': 0,
+                'detections_per_frame': [],
+                'success': False,
+                'error': str(e)
+            }
+# Global instance
+_multi_car_detector = None
+def get_multi_car_detector(confidence_threshold: float = 0.25) -> MultiCarDetector:
+    """Get or create global multi-car detector instance."""
+    global _multi_car_detector
+    if _multi_car_detector is None:
+        _multi_car_detector = MultiCarDetector(confidence_threshold=confidence_threshold)
+        _multi_car_detector.load_model()
+    return _multi_car_detector

app/services/multi_car_pipeline.py ADDED Viewed

	@@ -0,0 +1,128 @@

+"""
+Multi-car video detection pipeline service.
+"""
+import os
+import time
+import tempfile
+from typing import Dict, Optional
+from pathlib import Path
+from app.models.multi_car_detector import get_multi_car_detector
+class MultiCarVideoPipeline:
+    """
+    Pipeline for processing videos with multi-car and driver detection.
+    Processes videos frame by frame and returns annotated video with detections.
+    """
+    def __init__(self, confidence_threshold: float = 0.25):
+        """Initialize the pipeline with the multi-car detector."""
+        self.detector = get_multi_car_detector(confidence_threshold=confidence_threshold)
+    def process_video(self, video_path: str, output_path: Optional[str] = None) -> Dict:
+        """
+        Process a video and return annotated video with detections.
+        Args:
+            video_path: Path to input video file
+            output_path: Optional path to save annotated video (if None, uses temp file)
+        Returns:
+            Dictionary containing:
+                - success: Boolean indicating if processing was successful
+                - output_path: Path to annotated video
+                - total_frames: Total number of frames processed
+                - detections_per_frame: List of detections per frame
+                - processing_time: Time taken to process video
+                - error: Error message (if failed)
+        """
+        result = {
+            'success': False,
+            'output_path': None,
+            'total_frames': 0,
+            'detections_per_frame': [],
+            'processing_time': 0.0,
+            'error': None
+        }
+        if not os.path.exists(video_path):
+            result['error'] = f"Video file not found: {video_path}"
+            return result
+        start_time = time.time()
+        try:
+            # If no output path specified, create a temp file
+            if output_path is None:
+                # Create output directory if it doesn't exist
+                output_dir = Path("output/multi_car_detection")
+                output_dir.mkdir(parents=True, exist_ok=True)
+                # Generate output filename based on input filename
+                input_filename = Path(video_path).stem
+                output_path = str(output_dir / f"{input_filename}_annotated.mp4")
+            # Process video using detector
+            detection_result = self.detector.predict_video(video_path, save_path=output_path)
+            if not detection_result.get('success', False):
+                result['error'] = detection_result.get('error', 'Video processing failed')
+                return result
+            result['success'] = True
+            result['output_path'] = detection_result['output_path']
+            result['total_frames'] = detection_result['total_frames']
+            result['detections_per_frame'] = detection_result['detections_per_frame']
+            result['processing_time'] = time.time() - start_time
+        except Exception as e:
+            result['error'] = f"Pipeline error: {str(e)}"
+            result['processing_time'] = time.time() - start_time
+            print(f"Multi-car video pipeline error: {e}")
+        return result
+    def get_detection_summary(self, detections_per_frame: list) -> Dict:
+        """
+        Generate a summary of detections across all frames.
+        Args:
+            detections_per_frame: List of detections per frame
+        Returns:
+            Dictionary with detection statistics
+        """
+        total_detections = 0
+        class_counts = {}
+        max_detections_per_frame = 0
+        for frame_detections in detections_per_frame:
+            frame_count = len(frame_detections)
+            total_detections += frame_count
+            max_detections_per_frame = max(max_detections_per_frame, frame_count)
+            for detection in frame_detections:
+                class_name = detection.get('class_name', 'unknown')
+                class_counts[class_name] = class_counts.get(class_name, 0) + 1
+        return {
+            'total_detections': total_detections,
+            'total_frames': len(detections_per_frame),
+            'average_detections_per_frame': total_detections / len(detections_per_frame) if detections_per_frame else 0,
+            'max_detections_per_frame': max_detections_per_frame,
+            'class_counts': class_counts
+        }
+# Global pipeline instance
+_multi_car_pipeline = None
+def get_multi_car_pipeline(confidence_threshold: float = 0.25) -> MultiCarVideoPipeline:
+    """Get or create global multi-car video pipeline instance."""
+    global _multi_car_pipeline
+    if _multi_car_pipeline is None:
+        _multi_car_pipeline = MultiCarVideoPipeline(confidence_threshold=confidence_threshold)
+    return _multi_car_pipeline

app/utils/config.py CHANGED Viewed

@@ -14,6 +14,7 @@ WORD_DETECTION_MODEL = "Safe-Drive-TN/tunis-word-detection-yolov8s"
 OCR_MODEL = "microsoft/trocr-base-printed"
 STATE_FARM_MODEL = "Safe-Drive-TN/State-farm-detection"
 # HuggingFace Token
 HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

 OCR_MODEL = "microsoft/trocr-base-printed"
 STATE_FARM_MODEL = "Safe-Drive-TN/State-farm-detection"
+MULTI_CAR_DETECTION_MODEL = "Safe-Drive-TN/Multiple-Car-Detection"
 # HuggingFace Token
 HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")