-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
101 lines (83 loc) · 3.92 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks.python import vision
from mediapipe.python import solutions
from mediapipe.framework.formats import landmark_pb2
import sklearn
from pathlib import Path
text = Path("classes.txt").read_text(encoding="utf-8")
classes = {idx: val.strip() for idx, val in enumerate(text.split(","))}
MIN_CONFIDENCE = 0.5
def draw_landmarks(img: mp.Image, detection_results: vision.HandLandmarkerResult) -> np.array:
"""Function takes an image and detection results and draws landmarks
if found any hand. Returns an annotated image."""
# img.numpy_view() is readable only
# so we need to make a copy of it for use in drawing_utils
annotated_img = np.copy(img.numpy_view())
# get only hand landmarks from detection results
hand_landmarks_list = detection_results.hand_landmarks
# for every hand found
for hand_landmarks in hand_landmarks_list:
# we convert every landmark to pb2 format
# so we can use in in drawing utils
hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
hand_landmarks_proto.landmark.extend(
[landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks]
)
# main drawing function
solutions.drawing_utils.draw_landmarks(
image=annotated_img,
landmark_list=hand_landmarks_proto,
connections=solutions.hands.HAND_CONNECTIONS,
landmark_drawing_spec=solutions.drawing_styles.get_default_hand_landmarks_style(),
connection_drawing_spec=solutions.drawing_styles.get_default_hand_connections_style(),
)
return annotated_img
def extract_hand_landmarks(detection_results: vision.HandLandmarkerResult) -> np.array:
"""Function takes detection results and
returns a list of 21 (x, y) pair for every hand detected"""
# get only hand landmarks from detection results
hand_landmarks_list = detection_results.hand_landmarks
# initialize a resulting list
data_xy = []
# for every hand found
for hand in hand_landmarks_list:
# list for current hand landmarks
hand_xy = []
# for every landmark we extract x and y
for landmark in hand:
hand_xy.append((landmark.x, landmark.y))
# append current hand 21 (x, y) pairs
data_xy.append(hand_xy)
return data_xy
def classify_and_draw(
img: mp.Image, detection_results: vision.HandLandmarkerResult, model: sklearn.base.BaseEstimator
) -> np.array:
"""Function takes an image, detection results and a model
and returns image with drawn landmarks and predicted class"""
# first we draw landmarks
img = draw_landmarks(img, detection_results)
# extract 21 pair of (x, y) for every hand in the image
data = extract_hand_landmarks(detection_results)
# if at least 1 hand found
if data:
for hand in data:
# get top left corner coordinates for text
x_min = int(min(i[0] for i in hand) * img.shape[1])
y_min = int(min(i[1] for i in hand) * img.shape[0])
# reshape 21 pair of (x, y) to 1 vector with 42 features
hand = np.reshape(hand, (1, -1)) # (21, 2) -> (1, 42)
# prediction will look like [[0.1, 0.2, 0.7]] for 3 classes
pred = model.predict_proba(hand)
# if there is class with probability score greater than MAX_CONFIDENCE
if any(pred[0] > MIN_CONFIDENCE):
# we get the index of the biggest value in pred
index = pred.argmax()
# get the name of the class from dict at the top of this file
class_name = classes[index]
else:
# if confidence is less than MAX_CONFIDENCE
class_name = "Unknown"
cv2.putText(img, class_name, (x_min, y_min - 10), cv2.FONT_HERSHEY_COMPLEX, 1.5, (0, 0, 255), 2)
return img