-
Notifications
You must be signed in to change notification settings - Fork 151
/
run_experiment.py
67 lines (51 loc) · 2.17 KB
/
run_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import time
import numpy
from nearpy import Engine
from nearpy.hashes import RandomDiscretizedProjections, UniBucket
from nearpy.filters import NearestFilter, UniqueFilter
from nearpy.distances import EuclideanDistance
from nearpy.experiments import DistanceRatioExperiment, RecallPrecisionExperiment
# Set dimension and vector count for this experiment
dimension = 100
vector_count = 100000
# Create data set from two clusters
vectors = []
center = numpy.random.randn(dimension)
for index in xrange(vector_count/2):
vector = center + 0.01 * numpy.random.randn(dimension)
vectors.append(vector)
center = numpy.random.randn(dimension)
for index in xrange(vector_count/2):
vector = center + 0.01 * numpy.random.randn(dimension)
vectors.append(vector)
# We are looking for the N closest neighbours
N = 20
nearest = NearestFilter(N)
# We will fill this array with all the engines we want to test
engines = []
print 'Creating engines...'
# We are going to test these bin widths
bin_widths = [ 0.01 * x for x in range(1,5)]
# Create engines for all configurations
for bin_width in bin_widths:
# Use four random 1-dim discretized projections
rdp1 = RandomDiscretizedProjections('rdp1', 4, bin_width)
rdp2 = RandomDiscretizedProjections('rdp2', 4, bin_width)
rdp3 = RandomDiscretizedProjections('rdp3', 4, bin_width)
rdp4 = RandomDiscretizedProjections('rdp4', 4, bin_width)
#ub1 = UniBucket('uni')
# Create engine with this configuration
#engine = Engine(dimension, lshashes=[rdp1, rdp2, rdp3, rdp4],
# vector_filters=[unique, nearest])
engine = Engine(dimension, lshashes=[rdp1, rdp2, rdp3, rdp4],
vector_filters=[nearest])
# Add engine to list of engines to evaluate
engines.append(engine)
print 'Creating experiment and performing exact search...'
# Create experiment (looking for ten closest neighbours).
# The constructor performs exact search for evaluation.
# So the data set should not be too large for experiments.
exp = DistanceRatioExperiment(N, vectors, coverage_ratio=0.01)
print 'Performing experiment for all engines...'
# Perform experiment for all engines
result = exp.perform_experiment(engines)