This repository has been archived by the owner on Aug 21, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
geocoder.js
156 lines (142 loc) · 5.39 KB
/
geocoder.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
var nodeGeocoder = require('node-geocoder');
var RateLimiter = require('limiter').RateLimiter;
var async = require('async');
module.exports = function(options) {
return new Geocoder(options);
};
function Geocoder(options) {
var self = this;
self._nodeGeocoder = nodeGeocoder(options || {});
// Google's standard free limits
self._dailyLimit = options.dailyLimit || 2500;
self._rateLimit = options.rateLimit || 10;
self._instance = options.instance;
self._apos = options.apos;
self.cacheLifetime = options.cacheLifetime || 86400;
self.cache = self._apos.getCache('apostrophe-map-geocoder');
var dayLimiter = new RateLimiter(self._dailyLimit, 'day');
var secondLimiter = new RateLimiter(self._rateLimit, 'second');
// Strategy: wake up once a second, look for ungeocoded addresses, pull
// as many as the rate limit allows per second and then use RateLimiter
// to ensure we don't go faster than the daily and per-second rate limits
// of Google's API permit.
self.geocodePass = function() {
// Make sure an address exists, otherwise the geocode module will complain in a way
// that sticks us in a loop trying again with that bad location forever
self._apos.pages.find({ type: self._instance, address: { $exists: true, $ne: '' }, geoInvalidAddress: { $ne: true }, $or: [{ geo: { $exists: false }}, { geo: null } ] },
{ title: 1, address: 1 }).limit(self._rateLimit).toArray(function(err, snippets) {
// Use eachSeries to avoid parallelism, the rate limiter below should in theory
// make this not a problem but I've seen Google get grumpy.
async.eachSeries(snippets || [], geocodeSnippet, function(err) {
// Don't invoke passes so ferociously often, and
// introduce randomness to deal more gracefully
// with situations where many Apostrophe instances
// are talking to MongoDB
setTimeout(self.geocodePass, 10000 + Math.random() * 5000);
});
function geocodeSnippet(snippet, callback) {
// Use rate limiter to avoid getting shut down by Google during large imports.
// This still won't help you if you hit the per-day limit (2,000+), we would
// have to resolve that with something in the background
dayLimiter.removeTokens(1, function() {
secondLimiter.removeTokens(1, function() {
return self.geocodeSnippet(snippet, true, callback);
});
});
}
});
};
// Geocode an address now. Callback receives an error if
// any and a geoJSON point:
//
// { type: 'point', coordinates: [ longitude, latitude ] }
//
// Checks the cache first
self.geocode = function(address, callback) {
var location;
return self.cache.get(address, function(err, value) {
if (err) {
return callback(err);
}
if (value) {
return callback(null, value);
}
return fetch();
});
function fetch() {
return self._nodeGeocoder.geocode(address, function(err, geo) {
if (err) {
console.error('geocoding error: ', err);
return callback(err);
}
if (!geo) {
console.error('geocoding problem: invalid response');
return callback(new Error('Invalid response'));
}
if (!geo.length) {
// No location was found (?)
return callback(null, null);
}
var googlePoint = geo[0];
location = {
type: 'Point',
coordinates: [ googlePoint.longitude, googlePoint.latitude ]
};
return insert();
});
}
function insert() {
return self.cache.set(address, location, self.cacheLifetime, function(err) {
if (err) {
return callback(err);
}
return callback(null, location);
});
}
};
// Available to be called individually, for instance for manual edits where
// it is unlikely the rate limit will be reached
self.geocodeSnippet = function(snippet, saveNow, callback) {
snippet.geo = null;
return async.series({
geocode: function(callback) {
// If a manually entered location is present, let it win
if ((typeof(snippet.lat) === 'number') && (typeof(snippet.lng) === 'number')) {
snippet.geoInvalidAddress = false;
snippet.geo = {
type: 'Point',
coordinates: [ snippet.lng, snippet.lat ]
};
return callback(null);
}
return self.geocode(snippet.address, function(err, geo) {
if (err) {
// Who knows? Usually rate limiting. Hard to tell with an API that makes it
// hard to catch things with any nuance. Try again later
snippet.geo = null;
return callback(null);
}
if (!geo) {
snippet.geoInvalidAddress = true;
} else {
snippet.geoInvalidAddress = false;
snippet.geo = geo;
}
return callback(null);
});
},
save: function(callback) {
if (saveNow) {
self._apos.pages.update({ _id: snippet._id }, { $set: { geo: snippet.geo, geoInvalidAddress: snippet.geoInvalidAddress } }, function(err) {
// If it didn't work, it'll come up in the next query,
// no need to report the error now
return callback(null);
});
} else {
return callback(null);
}
}
}, callback);
};
self.geocodePass();
}