forked from tonytonyjan/jaro_winkler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rakefile
51 lines (46 loc) · 1.9 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
require "bundler/gem_tasks"
require "rake/extensiontask"
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new(:spec)
Rake::ExtensionTask.new("jaro_winkler") do |ext|
ext.lib_dir = "lib/jaro_winkler"
end
task default: [:compile, :spec]
desc 'type can be "native" or "pure"'
task :benchmark, :type do |t, args|
args.with_defaults(type: :all)
ROOT_PATH = File.expand_path('..', __FILE__)
LIB_PATH = File.join(ROOT_PATH, 'lib')
BENCHMARK_PATH = File.join(ROOT_PATH, 'benchmark')
files = File.join(BENCHMARK_PATH, args[:type] == :all ? '*.rb' : "#{args[:type]}.rb")
Dir[files].each do |path|
output_path = File.join(BENCHMARK_PATH, File.basename(path, '*.rb').sub('.rb', '.txt'))
cmd = "RUBYLIB=#{LIB_PATH} ruby #{path}"
puts cmd
output = `#{cmd}`
File.write(output_path, output)
end
end
task :compare do
require 'jaro_winkler'
require 'fuzzystringmatch'
require 'hotwater'
require 'amatch'
@ary = [['henka', 'henkan'], ['al', 'al'], ['martha', 'marhta'], ['jones', 'johnson'], ['abcvwxyz', 'cabvwxyz'], ['dwayne', 'duane'], ['dixon', 'dicksonx'], ['fvie', 'ten'], ['San Francisco', 'Santa Monica']]
table = []
table << %w[str_1 str_2 jaro_winkler fuzzystringmatch hotwater amatch]
table << %w[--- --- --- --- --- ---]
jarow = FuzzyStringMatch::JaroWinkler.create(:native)
@ary.each do |str_1, str_2|
table << ["\"#{str_1}\"", "\"#{str_2}\"", JaroWinkler.distance(str_1, str_2).round(4), jarow.getDistance(str_1, str_2).round(4), Hotwater.jaro_winkler_distance(str_1, str_2).round(4), Amatch::Jaro.new(str_1).match(str_2).round(4)]
end
col_len = []
table.first.length.times{ |i| col_len << table.map{ |row| row[i].to_s.length }.max }
table.first.each_with_index{ |title, i| "%-#{col_len[i]}s" % title }
table.each_with_index do |row|
row.each_with_index do |col, i|
row[i] = "%-#{col_len[i]}s" % col.to_s
end
end
table.each{|row| puts row.join(' | ')}
end