-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_vocabulary.py
79 lines (76 loc) · 5.11 KB
/
build_vocabulary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from PIL import Image
import numpy as np
from cyvlfeat.sift.dsift import dsift
from cyvlfeat.kmeans import kmeans
from time import time
from tqdm import tqdm
#This function will sample SIFT descriptors from the training images,
#cluster them with kmeans, and then return the cluster centers.
def build_vocabulary(image_paths, vocab_size):
##################################################################################
# TODO: #
# Load images from the training set. To save computation time, you don't #
# necessarily need to sample from all images, although it would be better #
# to do so. You can randomly sample the descriptors from each image to save #
# memory and speed up the clustering. Or you can simply call vl_dsift with #
# a large step size here. #
# #
# For each loaded image, get some SIFT features. You don't have to get as #
# many SIFT features as you will in get_bags_of_sift.py, because you're only #
# trying to get a representative sample here. #
# #
# Once you have tens of thousands of SIFT features from many training #
# images, cluster them with kmeans. The resulting centroids are now your #
# visual word vocabulary. #
##################################################################################
##################################################################################
# NOTE: Some useful functions #
# This function will sample SIFT descriptors from the training images, #
# cluster them with kmeans, and then return the cluster centers. #
# #
# Function : dsift() #
# SIFT_features is a N x 128 matrix of SIFT features #
# There are step, bin size, and smoothing parameters you can #
# manipulate for dsift(). We recommend debugging with the 'fast' #
# parameter. This approximate version of SIFT is about 20 times faster to #
# compute. Also, be sure not to use the default value of step size. It will #
# be very slow and you'll see relatively little performance gain from #
# extremely dense sampling. You are welcome to use your own SIFT feature. #
# #
# Function : kmeans(X, K) #
# X is a M x d matrix of sampled SIFT features, where M is the number of #
# features sampled. M should be pretty large! #
# K is the number of clusters desired (vocab_size) #
# centers is a d x K matrix of cluster centroids. #
# #
# NOTE: #
# e.g. 1. dsift(img, step=[?,?], fast=True) #
# 2. kmeans( ? , vocab_size) #
# #
# ################################################################################
'''
Input :
image_paths : a list of training image path
vocal size : number of clusters desired
Output :
Clusters centers of Kmeans
'''
features = []
for path in tqdm(image_paths):
img = np.asarray(Image.open(path),dtype='float32')
# the function dsift return a descrptor with size (F, 128), where F is the number of keypoints (frames) used. The 128 length vectors per keypoint extracted.
_, descriptor = dsift(img, step=[3,3], fast=True)
features.append(descriptor)
# concatenate all features to be 2D matrix of shape (1500*F, 128)
features = np.concatenate(features, axis=0).astype('float32')
print("Compute vocab...")
start_time = time()
# cluster features into "vocab_size" groups
# the function kmeans will return a 2D matrix of shape (vocab_size, 128)
vocab = kmeans(features, vocab_size, initialization="PLUSPLUS")
end_time = time()
print(f"It takes {((end_time - start_time)/60):.2f} minutes to compute vocab.")
##################################################################################
# END OF YOUR CODE #
##################################################################################
return vocab