Spaces:
Running
Running
Josh Brown Kramer
commited on
Commit
·
4df3bee
1
Parent(s):
74a9cf9
In place zombies working
Browse files- align.py +39 -0
- app.py +4 -8
- faceparsing.py +0 -1
- faceparsing2.py +5 -19
- requirements.txt +1 -4
- zombie.py +1 -6
align.py
CHANGED
|
@@ -2,10 +2,49 @@ import numpy as np
|
|
| 2 |
import mediapipe as mp
|
| 3 |
from PIL import Image
|
| 4 |
import PIL
|
|
|
|
|
|
|
| 5 |
|
| 6 |
mp_face_detection = mp.solutions.face_detection
|
| 7 |
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=0)
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def get_landmarks(numpy_array,locations,context,model_type="dlib"):
|
| 10 |
'''
|
| 11 |
model_type can be "dlib" or "mediapipe"
|
|
|
|
| 2 |
import mediapipe as mp
|
| 3 |
from PIL import Image
|
| 4 |
import PIL
|
| 5 |
+
import scipy
|
| 6 |
+
import scipy.ndimage
|
| 7 |
|
| 8 |
mp_face_detection = mp.solutions.face_detection
|
| 9 |
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=0)
|
| 10 |
|
| 11 |
+
def aligns(pil_image,enable_padding=True,output_size=512,model_type="dlib",max_people=7):
|
| 12 |
+
w,h = pil_image.size
|
| 13 |
+
scale = 1
|
| 14 |
+
if min(w,h) > output_size*2:
|
| 15 |
+
scale = min(w,h) / (output_size*2)
|
| 16 |
+
new_w = int(w/scale)
|
| 17 |
+
new_h = int(h/scale)
|
| 18 |
+
pil_image = pil_image.resize((new_w,new_h),PIL.Image.BILINEAR)
|
| 19 |
+
|
| 20 |
+
numpy_im = np.array(pil_image)
|
| 21 |
+
|
| 22 |
+
#Find the locations of faces
|
| 23 |
+
locations,context = get_locations(numpy_im,model_type)#face_recognition.face_locations(numpy_im)
|
| 24 |
+
n_found = len(locations)
|
| 25 |
+
print("Faces found",n_found)
|
| 26 |
+
if (n_found == 0):
|
| 27 |
+
return []
|
| 28 |
+
|
| 29 |
+
#How many are we going to return?
|
| 30 |
+
n_to_return = min(n_found,max_people)
|
| 31 |
+
|
| 32 |
+
#Return the largest ones
|
| 33 |
+
areas = [(l[2] - l[0])*(l[1] - l[3]) for l in locations]
|
| 34 |
+
indices = np.argpartition(areas, -n_to_return)[-n_to_return:]
|
| 35 |
+
|
| 36 |
+
#Find the landmarks
|
| 37 |
+
face_landmarks_list = get_landmarks(numpy_im,[locations[i] for i in indices],context,model_type)#face_recognition.face_landmarks(numpy_im,[locations[i]])
|
| 38 |
+
|
| 39 |
+
#Package them up
|
| 40 |
+
to_return = []
|
| 41 |
+
for face in face_landmarks_list:
|
| 42 |
+
im,quad = image_align(pil_image,face,enable_padding=enable_padding,output_size=output_size,transform_size=output_size)
|
| 43 |
+
to_return.append((im,quad*scale))
|
| 44 |
+
|
| 45 |
+
#Return them
|
| 46 |
+
return to_return
|
| 47 |
+
|
| 48 |
def get_landmarks(numpy_array,locations,context,model_type="dlib"):
|
| 49 |
'''
|
| 50 |
model_type can be "dlib" or "mediapipe"
|
app.py
CHANGED
|
@@ -44,14 +44,10 @@ def predict(input_image, mode):
|
|
| 44 |
return zombie_image
|
| 45 |
elif mode == "In Place":
|
| 46 |
im_array = np.array(input_image)
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
#return zombie_image
|
| 52 |
-
face_mask = get_face_mask(input_image)
|
| 53 |
-
return face_mask
|
| 54 |
-
|
| 55 |
else:
|
| 56 |
return "Invalid mode selected"
|
| 57 |
|
|
|
|
| 44 |
return zombie_image
|
| 45 |
elif mode == "In Place":
|
| 46 |
im_array = np.array(input_image)
|
| 47 |
+
zombie_image = zombie.make_faces_zombie_from_array(im_array, None, ort_session)
|
| 48 |
+
if zombie_image is None:
|
| 49 |
+
return "No face found"
|
| 50 |
+
return zombie_image
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
else:
|
| 52 |
return "Invalid mode selected"
|
| 53 |
|
faceparsing.py
CHANGED
|
@@ -4,7 +4,6 @@ from transformers import SegformerImageProcessor, SegformerForSemanticSegmentati
|
|
| 4 |
import numpy as np
|
| 5 |
|
| 6 |
from PIL import Image
|
| 7 |
-
import matplotlib.pyplot as plt
|
| 8 |
|
| 9 |
# Global variables for lazy loading
|
| 10 |
image_processor = None
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
|
| 6 |
from PIL import Image
|
|
|
|
| 7 |
|
| 8 |
# Global variables for lazy loading
|
| 9 |
image_processor = None
|
faceparsing2.py
CHANGED
|
@@ -7,9 +7,9 @@ from pathlib import Path
|
|
| 7 |
import numpy as np
|
| 8 |
from tqdm import tqdm
|
| 9 |
import onnxruntime as ort
|
| 10 |
-
import matplotlib.pyplot as plt
|
| 11 |
from huggingface_hub import hf_hub_download
|
| 12 |
import cv2
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
model_path = hf_hub_download(repo_id="jbrownkramer/face-parsing", filename="resnet18.onnx")
|
|
@@ -17,24 +17,10 @@ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if ort.get_device(
|
|
| 17 |
session = ort.InferenceSession(model_path, providers=providers)
|
| 18 |
|
| 19 |
def prepare_image(image, input_size: Tuple[int, int] = (512, 512)) -> np.ndarray:
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
# # Define transformation pipeline
|
| 24 |
-
# transform = transforms.Compose([
|
| 25 |
-
# transforms.ToTensor(),
|
| 26 |
-
# transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
| 27 |
-
# ])
|
| 28 |
-
|
| 29 |
-
# # Apply transformations
|
| 30 |
-
# image_tensor = transform(resized_image)
|
| 31 |
-
# image_batch = image_tensor.unsqueeze(0)
|
| 32 |
-
|
| 33 |
-
image_batch = np.array(resized_image)
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
image_batch = image_batch / 255.0
|
| 39 |
image_batch -= np.array([[[0.485, 0.456, 0.406]]])
|
| 40 |
image_batch /= np.array([[[0.229, 0.224, 0.225]]])
|
|
@@ -46,7 +32,7 @@ def prepare_image(image, input_size: Tuple[int, int] = (512, 512)) -> np.ndarray
|
|
| 46 |
|
| 47 |
def get_face_mask(image):
|
| 48 |
# Store original image resolution
|
| 49 |
-
h
|
| 50 |
|
| 51 |
# Prepare image for inference
|
| 52 |
image_batch = prepare_image(image)
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
from tqdm import tqdm
|
| 9 |
import onnxruntime as ort
|
|
|
|
| 10 |
from huggingface_hub import hf_hub_download
|
| 11 |
import cv2
|
| 12 |
+
from PIL import Image
|
| 13 |
|
| 14 |
|
| 15 |
model_path = hf_hub_download(repo_id="jbrownkramer/face-parsing", filename="resnet18.onnx")
|
|
|
|
| 17 |
session = ort.InferenceSession(model_path, providers=providers)
|
| 18 |
|
| 19 |
def prepare_image(image, input_size: Tuple[int, int] = (512, 512)) -> np.ndarray:
|
| 20 |
+
image_batch = np.array(image)
|
| 21 |
+
# Resize the image
|
| 22 |
+
resized_image = cv2.resize(image_batch, input_size, interpolation=cv2.INTER_LINEAR)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
image_batch = image_batch / 255.0
|
| 25 |
image_batch -= np.array([[[0.485, 0.456, 0.406]]])
|
| 26 |
image_batch /= np.array([[[0.229, 0.224, 0.225]]])
|
|
|
|
| 32 |
|
| 33 |
def get_face_mask(image):
|
| 34 |
# Store original image resolution
|
| 35 |
+
w,h = image.size
|
| 36 |
|
| 37 |
# Prepare image for inference
|
| 38 |
image_batch = prepare_image(image)
|
requirements.txt
CHANGED
|
@@ -3,9 +3,6 @@ onnxruntime
|
|
| 3 |
opencv-python
|
| 4 |
numpy
|
| 5 |
mediapipe
|
| 6 |
-
transformers
|
| 7 |
Pillow
|
| 8 |
-
matplotlib
|
| 9 |
huggingface-hub
|
| 10 |
-
|
| 11 |
-
torchvision
|
|
|
|
| 3 |
opencv-python
|
| 4 |
numpy
|
| 5 |
mediapipe
|
|
|
|
| 6 |
Pillow
|
|
|
|
| 7 |
huggingface-hub
|
| 8 |
+
scipy
|
|
|
zombie.py
CHANGED
|
@@ -3,9 +3,7 @@
|
|
| 3 |
|
| 4 |
from PIL import Image
|
| 5 |
import numpy as np
|
| 6 |
-
# import pickle
|
| 7 |
import align
|
| 8 |
-
# import time
|
| 9 |
import cv2
|
| 10 |
|
| 11 |
from faceparsing2 import get_face_mask
|
|
@@ -122,7 +120,6 @@ def ImageOpen(filepath):
|
|
| 122 |
|
| 123 |
def do_face(aligned, box, im_array, ort_session):
|
| 124 |
z = square_human_2_zombie_onnx(aligned,ort_session)
|
| 125 |
-
t1 = time.time()
|
| 126 |
|
| 127 |
t = cv2.getAffineTransform(np.array([[0,0],[0,511],[511,511]],dtype="float32"),box[:3,:].astype("float32"))
|
| 128 |
|
|
@@ -148,9 +145,7 @@ def make_faces_zombie(path, facenet, ort_session):
|
|
| 148 |
def make_faces_zombie_from_array(im_array_rgb, facenet, ort_session):
|
| 149 |
im_array_rgb = np.copy(im_array_rgb)
|
| 150 |
|
| 151 |
-
|
| 152 |
-
faces = align.aligns(Image.fromarray(im_array_rgb),enable_padding=True,output_size=512)
|
| 153 |
-
print("Find faces",time.time() - t0)
|
| 154 |
if faces is None:
|
| 155 |
faces = []
|
| 156 |
|
|
|
|
| 3 |
|
| 4 |
from PIL import Image
|
| 5 |
import numpy as np
|
|
|
|
| 6 |
import align
|
|
|
|
| 7 |
import cv2
|
| 8 |
|
| 9 |
from faceparsing2 import get_face_mask
|
|
|
|
| 120 |
|
| 121 |
def do_face(aligned, box, im_array, ort_session):
|
| 122 |
z = square_human_2_zombie_onnx(aligned,ort_session)
|
|
|
|
| 123 |
|
| 124 |
t = cv2.getAffineTransform(np.array([[0,0],[0,511],[511,511]],dtype="float32"),box[:3,:].astype("float32"))
|
| 125 |
|
|
|
|
| 145 |
def make_faces_zombie_from_array(im_array_rgb, facenet, ort_session):
|
| 146 |
im_array_rgb = np.copy(im_array_rgb)
|
| 147 |
|
| 148 |
+
faces = align.aligns(Image.fromarray(im_array_rgb),enable_padding=True,output_size=512,model_type="mediapipe")
|
|
|
|
|
|
|
| 149 |
if faces is None:
|
| 150 |
faces = []
|
| 151 |
|