-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference.py
executable file
·94 lines (77 loc) · 2.57 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import torch
import numpy as np
from network import C3D_model
import cv2
torch.backends.cudnn.benchmark = True
def CenterCrop(frame, size):
h, w = np.shape(frame)[0:2]
th, tw = size
x1 = int(round((w - tw) / 2.0))
y1 = int(round((h - th) / 2.0))
frame = frame[y1 : y1 + th, x1 : x1 + tw, :]
return np.array(frame).astype(np.uint8)
def center_crop(frame):
frame = frame[8:120, 30:142, :]
return np.array(frame).astype(np.uint8)
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device being used:", device)
with open("./dataloaders/ucf_labels.txt", "r") as f:
class_names = f.readlines()
f.close()
# init model
model = C3D_model.C3D(num_classes=101)
checkpoint = torch.load(
"run/run_1/models/C3D_ucf101_epoch-39.pth.tar",
map_location=lambda storage, loc: storage,
)
model.load_state_dict(checkpoint["state_dict"])
model.to(device)
model.eval()
# read video
video = "/Path/to/UCF-101/ApplyLipstick/v_ApplyLipstick_g04_c02.avi"
cap = cv2.VideoCapture(video)
retaining = True
clip = []
while retaining:
retaining, frame = cap.read()
if not retaining and frame is None:
continue
tmp_ = center_crop(cv2.resize(frame, (171, 128)))
tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
clip.append(tmp)
if len(clip) == 16:
inputs = np.array(clip).astype(np.float32)
inputs = np.expand_dims(inputs, axis=0)
inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
inputs = torch.from_numpy(inputs)
inputs = torch.autograd.Variable(inputs, requires_grad=False).to(device)
with torch.no_grad():
outputs = model.forward(inputs)
probs = torch.nn.Softmax(dim=1)(outputs)
label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]
cv2.putText(
frame,
class_names[label].split(" ")[-1].strip(),
(20, 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 0, 255),
1,
)
cv2.putText(
frame,
"prob: %.4f" % probs[0][label],
(20, 40),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 0, 255),
1,
)
clip.pop(0)
cv2.imshow("result", frame)
cv2.waitKey(30)
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()