-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathRegion-VLAD.py
191 lines (154 loc) · 6.03 KB
/
Region-VLAD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# implementation of Region-VLAD VPR framework
# Ahmad Khaliq
from _collections import defaultdict
import caffe
import pickle
import numpy as np
from skimage.measure import regionprops,label
import itertools
import time
# Load pickle file
def load_obj(name ):
with open( name, 'rb') as f:
return pickle.load(f)
# Read mean file
def binaryProto2npy(binaryProtoMeanFile):
blob = caffe.proto.caffe_pb2.BlobProto()
data = open( binaryProtoMeanFile, 'rb' ).read()
blob.ParseFromString(data)
data = np.array(blob.data)
arr = np.array( caffe.io.blobproto_to_array(blob) )
return arr[0]
# Extract N ROIs from the conv layer
def getROIs(imgConvFeat,imgLocalConvFeat,img):
clustersEnergies_Ej = []
clustersBoxes = []
allROI_Box = []
aggregatedROIs = []
for featuremap in imgConvFeat:
clusters = regionprops(label(featuremap),intensity_image=featuremap,cache=False);
clustersBoxes.append(list(cluster.bbox for cluster in clusters))
clustersEnergies_Ej.append(list(cluster.mean_intensity for cluster in clusters))
# Make a list of ROIs with their bounded boxes
clustersBoxes = list(itertools.chain.from_iterable(clustersBoxes))
clustersEnergies_Ej = list(itertools.chain.from_iterable(clustersEnergies_Ej))
# Sort the ROIs based on energies
allROIs = sorted(clustersEnergies_Ej,reverse=True)
# Pick up top N energetic ROIs with their bounding boxes
allROIs = allROIs[:N]
allROI_Box = [clustersBoxes[clustersEnergies_Ej.index(i)] for i in allROIs]
clustersEnergies_Ej.clear()
clustersBoxes.clear()
aggregatedNROIs = np.zeros((N,imgLocalConvFeat.shape[2]))
# Retreive the aggregated local descriptors lying under N ROIs
for ROI in range(len(allROI_Box)):
# minRow, minCol, maxRow, maxCol = allROI_Box[ROI][0],allROI_Box[ROI][1],allROI_Box[ROI][2],allROI_Box[ROI][3]
aggregatedNROIs[ROI,:] = np.sum(imgLocalConvFeat[allROI_Box[ROI][0]:allROI_Box[ROI][2],allROI_Box[ROI][1]:allROI_Box[ROI][3]],axis=(0,1))
# NxK dimensional ROIs
return np.asarray(aggregatedNROIs)
# Retreive the VLAD representation
def getVLAD(X,visualDictionary):
predictedLabels = visualDictionary.predict(X)
centers = visualDictionary.cluster_centers_
labels=visualDictionary.labels_
k=visualDictionary.n_clusters
m,d = X.shape
Vlad=np.zeros([k,d])
#computing the differences
# for all the clusters (visual words)
for i in range(k):
# if there is at least one descriptor in that cluster
if np.sum(predictedLabels==i)>0:
Vlad[i]=np.sum(X[predictedLabels==i,:]-centers[i],axis=0)
Vlad = Vlad.flatten()
Vlad = np.sign(Vlad)*np.sqrt(np.abs(Vlad))
Vlad = Vlad/np.sqrt(np.dot(Vlad,Vlad))
Vlad = Vlad.reshape(k,d)
return Vlad
# Load protext, model and mean file
protxt = "../AlexnetPlaces365/deploy_alexnet_places365.prototxt"
model = "../AlexnetPlaces365/alexnet_places365.caffemodel"
mean = "../AlexnetPlaces365/places365CNN_mean.binaryproto"
# Set ROIs
N = 400
# Set Layer
layer = 'conv3'
Features,StackedFeatures = defaultdict(list),defaultdict(list)
# Use GPU?
set_gpu =True
gpu_id = 0
totalT = 0
if set_gpu:
caffe.set_mode_gpu()
caffe.set_device(gpu_id)
else:
caffe.set_mode_cpu()
net = caffe.Net(protxt, model, caffe.TEST)
batch_size = 2
inputSize = net.blobs['data'].shape[2]
net.blobs['data'].reshape(batch_size,3,inputSize,inputSize)
# Test image
testImg = "../berlin_A100/berlin_A100_1/04FQ4NG5mYXOdbENCPup8w-640-00000.jpg"
# Ref image
refImg = "../berlin_A100/berlin_A100_2/HRdLsyO_U_0WmgrIZCKZ0g-640-00266.jpg"
image_paths_list = []
image_paths_list.append(testImg)
image_paths_list.append(refImg)
# Configuration 1
if N==200:
V = 128
vocab = load_obj("../Vocabulary_100_200_300.pkl")
# Configuration 2
else:
V = 256
vocab = load_obj("../Vocabulary_400.pkl")
# Load Images
images_loaded_by_caffe = [caffe.io.load_image(im) for im in image_paths_list]
images_loaded_by_caffe = np.array(images_loaded_by_caffe)
# Set Caffe
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
mean_file = binaryProto2npy(mean)
transformer.set_mean('data', mean_file.mean(1).mean(1))
transformer.set_transpose('data', (2,0,1))
transformer.set_channel_swap('data', (2,1,0))
transformer.set_raw_scale('data', 255)
data_blob_index = range(batch_size)
net.blobs['data'].data[data_blob_index] = \
[transformer.preprocess('data', img) for img in images_loaded_by_caffe]
print("*****Region-VLAD*****")
print("*****N: %d, V=%d*****" %(N,V))
ts=time.time()
# Forward Pass
res = net.forward()
te=time.time()
#print("Fwd Pass Time: %f ms" %((te-ts)*1000/float(batch_size)))
totalT +=(te-ts)*1000/float(batch_size)
ts=time.time()
# Stack the activations of feature maps to make local descriptors
Features[layer] = np.array(net.blobs[layer].data[data_blob_index].copy())
StackedFeatures[layer]=Features[layer].transpose(0,2,3,1)
# Retrieve N ROIs for test and ref images
testROIs= getROIs(Features[layer][0],StackedFeatures[layer][0],testImg)
refROIs= getROIs(Features[layer][1],StackedFeatures[layer][1],refImg)
te=time.time()
#print("Encoding Time: %f ms" %((te-ts)*1000/float(batch_size)))
totalT +=(te-ts)*1000/float(batch_size)
vocabulary = vocab[N][V][layer]
ts=time.time()
# Retrieve VLAD descriptors using ROIs and vocabulary
testVLAD= getVLAD(testROIs,vocabulary)
refVLAD= getVLAD(refROIs,vocabulary)
te=time.time()
#print("VLAD Encoding Time: %f ms" %((te-ts)*1000/float(batch_size)))
totalT +=(te-ts)*1000/float(batch_size)
ts=time.time()
# Dot Prouct of test and ref VLADs
cosineMatchScore = np.sum(np.einsum('ij,ij->i', testVLAD, refVLAD))
te=time.time()
#print("VLAD Matching Time: %f ms" %((te-ts)*1000))
matchTime=(te-ts)*1000
print ("Score: %f" %cosineMatchScore)
#print("Query time til VLAD encoding :%f ms" %totalT)
#print("Retrieval Time 1 Test vs 1 Ref :%f ms" %(totalT+matchTime))
#print("Retrieval Time 1 Test vs 750 Ref :%f ms" %(totalT+750*matchTime))