From 9c5e2885cadd6f2900b3e442379db6729194613a Mon Sep 17 00:00:00 2001 From: nucccc Date: Sun, 25 Aug 2024 13:11:32 +0200 Subject: [PATCH] introduced reverse geocoder --- geocode/geocode.py | 4 ++++ geocode/reverse_geocoder.py | 18 ++++++++++++++++++ requirements.txt | 1 + 3 files changed, 23 insertions(+) create mode 100644 geocode/reverse_geocoder.py diff --git a/geocode/geocode.py b/geocode/geocode.py index 718dd4d..58a0a62 100644 --- a/geocode/geocode.py +++ b/geocode/geocode.py @@ -14,6 +14,7 @@ import hashlib import json from .flags import flags +from .reverse_geocoder import ReverseGeocoder logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)-5.5s] [%(name)-12.12s]: %(message)s') @@ -312,6 +313,9 @@ def get_arguments_hash(self): geocode_args = [str(self.min_population_cutoff), str(self.large_city_population_cutoff)] + self.location_types return hashlib.sha256(','.join(geocode_args).encode()).hexdigest()[:15] + def build_reverse_geocoder(self): + return ReverseGeocoder(self.geo_data) + # private def _get_location_types(self, location_types): diff --git a/geocode/reverse_geocoder.py b/geocode/reverse_geocoder.py new file mode 100644 index 0000000..c4b175d --- /dev/null +++ b/geocode/reverse_geocoder.py @@ -0,0 +1,18 @@ +import numpy as np +from sklearn.neighbors import KDTree + +class ReverseGeocoder(): + + def __init__(self, geo_data): + self.geo_data = geo_data + + coord_arr = np.asarray(list((d[3], d[4]) for d in self.geo_data)) + + self.tree = KDTree(coord_arr) + + def nearest_geodata(self, lon, lat): + value = (lon, lat) + + index = self.tree.query([value])[1][0][0] + + return self.geo_data[index] diff --git a/requirements.txt b/requirements.txt index d2f72c3..b3f5ee9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ pandas>=1 tqdm flashtext joblib +scikit-learn