forked from jesu9/VGGFeatExtract
-
Notifications
You must be signed in to change notification settings - Fork 0
/
caffe_io.py
79 lines (69 loc) · 2.29 KB
/
caffe_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
import cv2
import hdf5storage
# This should be written as a IO module
# Load image, preprocess image
# Load video
# Dump to HDF5 for caffe training
def load_image(img_name):
# BGR order, 0-255
return cv2.imread(img_name)
def load_video(vid_name, sample_per_sec = 1):
cap = cv2.VideoCapture(vid_name)
frames = []
if not cap.isOpened():
print 'Cannot open', vid_name
return frames
# get FPS
fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
sample_step = np.ceil(float(fps) / sample_per_sec)
ind = 0
while cap.grab():
ind += 1
if ind % sample_step == 1:
(flag, frame) = cap.retrieve()
if flag:
frames.append(frame)
cap.release()
return frames
def save_matrix(mat, output_path):
hdf5storage.savemat(output_path, mat)
# Default parameters are for VGG net
# Input: Height x Width x Channel
# Output: #Sample x Channel x Height x Width
def transform_image(img, over_sample = False, mean_pix = [103.939, 116.779, 123.68], image_dim = 256, crop_dim = 224):
# convert to BGR
if len(img.shape) < 3 or img.shape[2] == 1:
img = cv2.cvtColor(img, cv2.cv.CV_GRAY2BGR)
# resize image, the shorter side is set to image_dim
if img.shape[0] < img.shape[1]:
# Note: OpenCV uses width first...
dsize = (int(np.floor(float(image_dim)*img.shape[1]/img.shape[0])), image_dim)
else:
dsize = (image_dim, int(np.floor(float(image_dim)*img.shape[0]/img.shape[1])))
img = cv2.resize(img, dsize, interpolation=cv2.INTER_CUBIC)
# convert to float32
img = img.astype(np.float32, copy=False)
if over_sample:
imgs = np.zeros((10, crop_dim, crop_dim, 3), dtype=np.float32)
else:
imgs = np.zeros((1, crop_dim, crop_dim, 3), dtype=np.float32)
# crop
indices_y = [0, img.shape[0]-crop_dim]
indices_x = [0, img.shape[1]-crop_dim]
center_y = np.floor(indices_y[1]/2)
center_x = np.floor(indices_x[1]/2)
imgs[0] = img[center_y:center_y+crop_dim, center_x:center_x+crop_dim, :]
if over_sample:
curr = 1
for i in indices_y:
for j in indices_x:
imgs[curr] = img[i:i+crop_dim, j:j+crop_dim, :]
imgs[curr+5] = imgs[curr, :, ::-1, :]
curr += 1
imgs[5] = imgs[0, :, ::-1, :]
# subtract mean
for c in range(3):
imgs[:, :, :, c] = imgs[:, :, :, c] - mean_pix[c]
# reorder axis
return np.rollaxis(imgs, 3, 1)