-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathprepro_feats.py
executable file
·110 lines (94 loc) · 4.22 KB
/
prepro_feats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import shutil
import subprocess
import glob
from tqdm import tqdm
import numpy as np
import os
import argparse
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
import pretrainedmodels
from pretrainedmodels import utils
C, H, W = 3, 224, 224
def extract_frames(video, dst):
with open(os.devnull, "w") as ffmpeg_log:
if os.path.exists(dst):
print(" cleanup: " + dst + "/")
shutil.rmtree(dst)
os.makedirs(dst)
video_to_frames_command = ["ffmpeg",
# (optional) overwrite output file if it exists
'-y',
'-i', video, # input file
'-vf', "scale=400:300", # input file
'-qscale:v', "2", # quality for JPEG
'{0}/%06d.jpg'.format(dst)]
subprocess.call(video_to_frames_command,
stdout=ffmpeg_log, stderr=ffmpeg_log)
def extract_feats(params, model, load_image_fn):
global C, H, W
model.eval()
dir_fc = params['output_dir']
if not os.path.isdir(dir_fc):
os.mkdir(dir_fc)
print("save video feats to %s" % (dir_fc))
video_list = glob.glob(os.path.join(params['video_path'], '*.mp4'))
for video in tqdm(video_list):
video_id = video.split("/")[-1].split(".")[0]
dst = params['model'] + '_' + video_id
extract_frames(video, dst)
image_list = sorted(glob.glob(os.path.join(dst, '*.jpg')))
samples = np.round(np.linspace(
0, len(image_list) - 1, params['n_frame_steps']))
image_list = [image_list[int(sample)] for sample in samples]
images = torch.zeros((len(image_list), C, H, W))
for iImg in range(len(image_list)):
img = load_image_fn(image_list[iImg])
images[iImg] = img
fc_feats = model(Variable(images).cuda()).squeeze()
img_feats = fc_feats.data.cpu().numpy()
# Save the inception features
outfile = os.path.join(dir_fc, video_id + '.npy')
np.save(outfile, img_feats)
# cleanup
shutil.rmtree(dst)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--gpu", dest='gpu', type=str, default='0',
help='Set CUDA_VISIBLE_DEVICES environment variable, optional')
parser.add_argument("--output_dir", dest='output_dir', type=str,
default='data/feats/resnet152', help='directory to store features')
parser.add_argument("--n_frame_steps", dest='n_frame_steps', type=int, default=40,
help='how many frames to sampler per video')
parser.add_argument("--video_path", dest='video_path', type=str,
default='data/train-video', help='path to video dataset')
parser.add_argument("--model", dest="model", type=str, default='resnet152',
help='the CNN model you want to use to extract_feats')
parser.add_argument("--saved_model", dest="saved_model", type=str, default='',
help='the pretrained CNN model you want to use to extract_feats')
args = parser.parse_args()
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
params = vars(args)
if params['model'] == 'inception_v3':
C, H, W = 3, 299, 299
model = pretrainedmodels.inceptionv3(pretrained='imagenet')
load_image_fn = utils.LoadTransformImage(model)
elif params['model'] == 'resnet152':
C, H, W = 3, 224, 224
model = pretrainedmodels.resnet152(pretrained='imagenet')
load_image_fn = utils.LoadTransformImage(model)
elif params['model'] == 'inception_v4':
C, H, W = 3, 299, 299
model = pretrainedmodels.inceptionv4(
num_classes=1000, pretrained='imagenet')
load_image_fn = utils.LoadTransformImage(model)
else:
print("doesn't support %s" % (params['model']))
model.last_linear = utils.Identity()
model = nn.DataParallel(model)
if params['saved_model'] != '':
model.load_state_dict(torch.load(params['saved_model']), strict=False)
model = model.cuda()
extract_feats(params, model, load_image_fn)