-
Notifications
You must be signed in to change notification settings - Fork 0
/
bens_pipline.py
370 lines (298 loc) · 14.9 KB
/
bens_pipline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
import cv2
import torch
import numpy as np
import bodypix
import pyttsx3
from models import *
from utils import *
# setups
face_th_rate = 0.05
thresholds_mask = [
0.5, 0.75, 0.25, 0.5, # 0.5, 0.5, 0.5, 0.5,
]
thresholds_pred = [
0.6, 0.8, 0.1, 0.5,
]
erosion_iterations = 1
dilation_iterations = 1
colour_distance_rate = 1.2
categories = ['hair', 'hat', 'glasses', 'face',]
# Load model
model_dir = "saved-model"
cat_layers = 4
# CHANGE THESE:
# segment_model = DeepLabV3PlusMobileNetV3(num_classes=4) # ['hair', 'hat', 'eye_g', 'skin', ] 'brow', 'eye', 'mouth', 'nose', ]
segment_model = UNetWithResnetEncoder(num_classes=4)
# predict_model = MultiLabelMobileNetV3Large(4, 7) # 'hair', 'hat', 'glasses', 'face', ; first three with colours, rgb
predict_model = MultiLabelResNet(num_labels=4, input_channels=7)
model = CombinedModel(segment_model, predict_model, cat_layers=cat_layers)
model.eval()
# Check for the latest saved model
latest_checkpoint = find_latest_checkpoint(model_dir)
if latest_checkpoint:
print(f"Loading model from {latest_checkpoint}")
model, _, start_epoch, best_val_loss = load_model(model, None, path=latest_checkpoint, cpu_only=True)
start_epoch += 1
else:
raise RuntimeError("No save model discovered under %s" % model_dir)
# initialize bodipix detector
bodypix_detector = bodypix.BodyPixDetector()
# prepare hair colour table
image_dict = load_images_to_dict('legacy/hair_colours')
hair_colour_table = generate_colour_table(image_dict, SPESIFIC_COLOURS)
def pad_image_to_even_dims(image):
# Get the current shape of the image
height, width, _ = image.shape
# Calculate the padding needed for height and width
height_pad = 0 if height % 2 == 0 else 1
width_pad = 0 if width % 2 == 0 else 1
# Pad the image. Pad the bottom and right side of the image
padded_image = np.pad(image, ((0, height_pad), (0, width_pad), (0, 0)), mode='constant', constant_values=0)
return padded_image
def process_head(head_frame, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred):
"""
Processes the head frame to extract class counts and color information for head-related classes.
Args:
- head_frame (np.ndarray): The head frame extracted by the BodyPix model.
- model: A PyTorch model instance for classifying and predicting masks for head features.
- thresholds_mask, erosion_iterations, dilation_iterations: Thresholds and iteration counts for binary erosion and dilation.
- thresholds_pred: A list of prediction thresholds.
Returns:
- Tuple[dict, dict]: A tuple containing two dictionaries:
- head_class_count: A dictionary with counts for each head-related class.
- head_class_colours: A dictionary with color information for each head-related class.
"""
head_class_count = {
'hair': 0,
'hat': 0,
'glasses': 0,
}
head_class_colours = {
'hair': {},
'hat': {},
'glasses': {},
}
if head_frame is not None:
try:
_head_frame_bgr = cv2.cvtColor(head_frame, cv2.COLOR_RGB2BGR)
cv2.imshow('Head Frame', _head_frame_bgr)
except Exception as ignore:
pass
# Convert head frame to PyTorch tensor and normalize
head_frame_tensor = torch.from_numpy(head_frame).permute(2, 0, 1).unsqueeze(0).float() / 255.0
masks_batch_pred, pred_classes = model(head_frame_tensor)
# Apply binary erosion and dilation to the masks
processed_masks = binary_erosion_dilation(
masks_batch_pred, thresholds=thresholds_mask,
erosion_iterations=erosion_iterations, dilation_iterations=dilation_iterations
)
masks = processed_masks.detach().squeeze(0).numpy().astype(np.uint8)
mask_list = [masks[i,:,:] for i in range(masks.shape[0])]
pred_classes = pred_classes.detach().squeeze(0).numpy()
# Determine if each class is present
class_list = [pred_classes[i].item() > thresholds_pred[i] for i in range(pred_classes.shape[0])]
# Update class count
for each_class, k in zip(class_list[0:3], ['hair', 'hat', 'glasses']):
head_class_count[k] = int(each_class)
# Update class colours
for f, each_mask, k, c_map in zip([head_frame, head_frame, head_frame], mask_list[0:2], ['hair', 'hat', 'glasses'], [SPESIFIC_COLOURS, DETAILED_COLOURS, DETAILED_COLOURS]):
colours = count_colours_in_masked_area(f, each_mask, c_map, sort=True)[1]
for colour in colours:
if colour[0] not in head_class_colours[k]:
head_class_colours[k][colour[0]] = [colour[1]]
else:
head_class_colours[k][colour[0]].append(colour[1])
return head_class_count, head_class_colours
def process_cloth(full_frame, torso_mask):
"""
Processes the full frame with the torso mask to extract class counts and color information for cloth.
Args:
- full_frame (np.ndarray): The full original frame from the video source.
- torso_mask (np.ndarray): The torso mask extracted by the BodyPix model.
Returns:
- Tuple[dict, dict]: A tuple containing two dictionaries:
- cloth_class_count: A dictionary with counts for the cloth class.
- cloth_class_colours: A dictionary with color information for the cloth class.
"""
cloth_class_count = {
'cloth': 0,
}
cloth_class_colours = {
'cloth': {},
}
# Check if cloth is detected
if torso_mask is not None and np.sum(torso_mask) >= 50:
cloth_class_count['cloth'] = 1
# Update cloth colours
colours = count_colours_in_masked_area(full_frame, torso_mask, DETAILED_COLOURS, sort=True)[1]
for colour in colours:
if colour[0] not in cloth_class_colours['cloth']:
cloth_class_colours['cloth'][colour[0]] = [colour[1]]
else:
cloth_class_colours['cloth'][colour[0]].append(colour[1])
return cloth_class_count, cloth_class_colours
# you can use this function directly for prediction.
def predict_frame(head_frame, torso_frame, full_frame, head_mask, torso_mask, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred):
"""
Predicts classes and color information for a single processed video frame.
Args:
- head_frame (np.ndarray): The head frame extracted by the BodyPix model.
- full_frame (np.ndarray): The full original frame from the video source.
- head_mask (np.ndarray): The head mask extracted by the BodyPix model.
- torso_mask (np.ndarray): The torso mask extracted by the BodyPix model.
- model: A PyTorch model instance for classifying and predicting masks for head features.
- thresholds_mask, erosion_iterations, dilation_iterations: Thresholds and iteration counts for binary erosion and dilation.
- thresholds_pred: A list of prediction thresholds.
Returns:
- Tuple[dict, dict]: A tuple containing:
- class_pred: A dictionary with predicted classes for the single frame.
- colour_pred: A dictionary with predicted colors for the single frame.
"""
class_count = {
'hair': 0,
'hat': 0,
'glasses': 0,
'cloth': 0,
}
class_colours = {
'hair': {},
'hat': {},
'glasses': {},
'cloth': {},
}
head_frame = pad_image_to_even_dims(head_frame)
torso_frame = pad_image_to_even_dims(torso_frame)
# Process head and cloth separately for the single frame
head_class_count, head_class_colours = process_head(head_frame, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred)
cloth_class_count, cloth_class_colours = process_cloth(full_frame, torso_mask)
# Update class counts and colours
for k in head_class_count:
class_count[k] = head_class_count[k]
class_colours[k] = head_class_colours[k]
class_count['cloth'] = cloth_class_count['cloth']
class_colours['cloth'] = cloth_class_colours['cloth']
# Compute final class predictions and colors for the single frame
class_pred = {k: bool(class_count[k]) for k in class_count}
colour_pred = {k: v for k, v in class_colours.items()}
return class_pred, colour_pred
# if able to provide multiple frames (see __main__ seciton), then this should work better than the single frame version.
def predict_frames(head_frames, torso_frames, full_frames, head_masks, torso_masks, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred, SPESIFIC_COLOURS):
"""
Predicts classes and color information for a sequence of processed video frames.
Args:
- head_frames (list[np.ndarray]): List of head frames extracted by the BodyPix model.
- torso_frames (list[np.ndarray]): List of body frames extracted by the BodyPix model.
- full_frames (list[np.ndarray]): List of full original frames from the video source.
- head_masks (list[np.ndarray]): List of head masks extracted by the BodyPix model.
- torso_masks (list[np.ndarray]): List of torso masks extracted by the BodyPix model.
- model: A PyTorch model instance for classifying and predicting masks for head features.
- thresholds_mask, erosion_iterations, dilation_iterations: Thresholds and iteration counts for binary erosion and dilation.
- thresholds_pred: A list of prediction thresholds.
- SPESIFIC_COLOURS: A dictionary of specific colors.
Returns:
- Tuple[dict, dict]: A tuple containing:
- class_pred: A dictionary with predicted classes.
- colour_pred: A dictionary with predicted colors.
"""
total_class_count = {
'hair': [],
'hat': [],
'glasses': [],
'cloth': [],
}
total_class_colours = {
'hair': {},
'hat': {},
'glasses': {},
'cloth': {},
}
for head_frame, torso_frame, full_frame, head_mask, torso_mask in zip(head_frames, torso_frames, full_frames, head_masks, torso_masks):
head_frame = pad_image_to_even_dims(head_frame)
torso_frame = pad_image_to_even_dims(torso_frame)
# Process head and cloth separately
head_class_count, head_class_colours = process_head(head_frame, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred)
cloth_class_count, cloth_class_colours = process_cloth(full_frame, torso_mask)
# Accumulate class counts and colours
for k in head_class_count:
total_class_count[k].append(head_class_count[k])
if k in head_class_colours:
for colour, count in head_class_colours[k].items():
if colour not in total_class_colours[k]:
total_class_colours[k][colour] = count
else:
total_class_colours[k][colour].extend(count)
total_class_count['cloth'].append(cloth_class_count['cloth'])
for colour, count in cloth_class_colours['cloth'].items():
if colour not in total_class_colours['cloth']:
total_class_colours['cloth'][colour] = count
else:
total_class_colours['cloth'][colour].extend(count)
# Compute final class predictions and colors
class_pred = {k: sum(v) >= len(v) / 2 for k, v in total_class_count.items()}
colour_pred = average_colours_by_label(total_class_count, total_class_colours)
return class_pred, colour_pred
def speak(words: str):
engine = pyttsx3.init()
engine.say(words)
engine.runAndWait()
if __name__ == "__main__":
cap = cv2.VideoCapture(0)
count = 0
_count = 56 # Skip initial frames for camera stabilization
head_frames, torso_frames, full_frames, head_masks, torso_masks = [], [], [], [], []
speak("Starting the detection process.")
while True:
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
if _count > 0:
_count -= 1
continue
# BodyPix detection
head_frame, torso_frame, head_mask, torso_mask = bodypix_detector.detect(frame_rgb, return_masks=True)
head_mask = np.squeeze(head_mask, -1)
torso_mask = np.squeeze(torso_mask, -1)
head_frames.append(head_frame)
torso_frames.append(torso_frame)
full_frames.append(frame_rgb)
head_masks.append(head_mask)
torso_masks.append(torso_mask)
if count % 7 == 0:
print("Analyzing frames...")
speak("Analyzing frames...")
class_pred, class_colours = predict_frames(head_frames, torso_frames, full_frames, head_masks, torso_masks, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred, SPESIFIC_COLOURS)
print("Predicted Classes:", class_pred)
# Process color distributions for hair
sorted_distances_hair = compare_colour_distributions(class_colours['hair'], hair_colour_table)[:3]
print("Top Possible Hair Colours:", sorted_distances_hair)
# Extract top colours for hat and cloth
class_colours['hat'] = extract_top_colours_by_threshold(class_colours['hat'], 0.66)
class_colours['cloth'] = extract_top_colours_by_threshold(class_colours['cloth'], 0.66)
print("Major Hat Colours:", class_colours['hat'])
print("Major Cloth Colours:", class_colours['cloth'])
# Announce and report detected classes and colours
speak("I can see...")
_c = 0
for _, k in enumerate(class_pred.keys()):
if class_pred[k]:
speak(k)
_c += 1
if _c == 0:
speak("Nothing.")
if class_pred['hat']:
hat_colour = str(class_colours['hat'][0][0]) if class_colours['hat'] else "Unknown"
print("Hat is likely to be:", hat_colour)
speak(f"Hat is likely to be: {hat_colour}")
if class_pred['hair']:
hair_colour = str(sorted_distances_hair[0][0]) if sorted_distances_hair else "Unknown"
print("Hair is likely to be:", hair_colour)
speak(f"Hair is likely to be: {hair_colour}")
if class_pred['cloth']:
cloth_colour = str(class_colours['cloth'][0][0]) if class_colours['cloth'] else "Unknown"
print("Cloth is likely to be:", cloth_colour)
speak(f"Cloth is likely to be: {cloth_colour}")
# Reset buffers for the next set of frames
head_frames, full_frames, head_masks, torso_masks = [], [], [], []
count += 1
cap.release()