diff --git a/server/lib/python/cartodb_services/cartodb_services/geocoder/__init__.py b/server/lib/python/cartodb_services/cartodb_services/geocoder/__init__.py new file mode 100644 index 00000000..f0625339 --- /dev/null +++ b/server/lib/python/cartodb_services/cartodb_services/geocoder/__init__.py @@ -0,0 +1,2 @@ +from geocoder_proxy import (GeocoderProxy, FirstAvailableGeocoder, + CheapestGeocoder) diff --git a/server/lib/python/cartodb_services/cartodb_services/geocoder/cacheable_geocoder.py b/server/lib/python/cartodb_services/cartodb_services/geocoder/cacheable_geocoder.py new file mode 100644 index 00000000..7485814c --- /dev/null +++ b/server/lib/python/cartodb_services/cartodb_services/geocoder/cacheable_geocoder.py @@ -0,0 +1,86 @@ +from types import MAPBOX + +CACHEABLE_GEOCODERS = [MAPBOX] + +SELECT_CACHE = ("select response from geocode_cache_{provider} " + "where id = '{id}'") +INSERT_CACHE = ("insert into geocode_cache_{provider} " + "(id, searchtext, city, state_province, country, response) " + "values (%s, %s, %s, %s, %s, %s) ") + + +def is_geocoder_cacheable(geocoder_name): + return geocoder_name in CACHEABLE_GEOCODERS + + +class CacheableGeocoder: + def __init__(self, geocoder, dbconn, use_cache=True, save_cache=True): + self._geocoder = geocoder + self._dbconn = dbconn + self._use_cache = use_cache + self._save_cache = save_cache + + def geocode(self, searchtext, city=None, state_province=None, + country=None): + response = None + if self._use_cache and is_geocoder_cacheable(self._geocoder.name): + response = self._find_in_cache(searchtext, city, state_province, + country) + + if not response: + response = self._lookup_geocoder(searchtext, city, state_province, + country) + + return self._geocoder.format_response(response) + + def _find_in_cache(self, searchtext, city=None, state_province=None, + country=None): + normalized_address = self._normalize_address(searchtext, city, + state_province, country) + with self._dbconn.cursor() as cursor: + cursor.execute(SELECT_CACHE.format(provider=self._geocoder.name, + id=normalized_address)) + data = cursor.fetchone() + return data[0] if data else None + + return None + + def _save_to_cache(self, response, searchtext, city=None, + state_province=None, country=None): + normalized_address = self._normalize_address(searchtext, city, + state_province, country) + response = response.encode('utf-8') # https://github.com/requests/requests/issues/1604 + + with self._dbconn.cursor() as cursor: + cursor.execute(INSERT_CACHE.format(provider=self._geocoder.name), + (normalized_address, searchtext, city, + state_province, country, response)) + self._dbconn.commit() + + def _lookup_geocoder(self, searchtext, city=None, state_province=None, + country=None): + response = self._geocoder.geocode(searchtext, city, state_province, + country) + + if self._save_cache and is_geocoder_cacheable(self._geocoder.name): + self._save_to_cache(response, searchtext, city, state_province, + country) + + return response + + def _normalize_address(self, searchtext, city=None, state_province=None, + country=None): + if searchtext and searchtext.strip(): + address = [searchtext] + if city: + address.append(city) + if state_province: + address.append(state_province) + if country: + address.append(country) + + address = ', '.join(address) + + return ' '.join(address.replace(',', ' ').split()).replace(' ', '_') + + return None diff --git a/server/lib/python/cartodb_services/cartodb_services/geocoder/geocoder_mapbox.py b/server/lib/python/cartodb_services/cartodb_services/geocoder/geocoder_mapbox.py new file mode 100644 index 00000000..3117e584 --- /dev/null +++ b/server/lib/python/cartodb_services/cartodb_services/geocoder/geocoder_mapbox.py @@ -0,0 +1,102 @@ +''' +This is a shameless copy of the Mapbox geocoder. +We will need to refactor it to return the whole response +and then delete this file. +''' +import json +import requests +from mapbox import Geocoder +from cartodb_services.metrics import Traceable +from cartodb_services.tools.exceptions import ServiceException +from cartodb_services.tools.qps import qps_retry +from types import MAPBOX + +GEOCODER_NAME = 'geocoder_name' +EPHEMERAL_GEOCODER = 'mapbox.places' +PERMANENT_GEOCODER = 'mapbox.places-permanent' +DEFAULT_GEOCODER = PERMANENT_GEOCODER + +ENTRY_FEATURES = 'features' +ENTRY_CENTER = 'center' +ENTRY_GEOMETRY = 'geometry' +ENTRY_COORDINATES = 'coordinates' +ENTRY_TYPE = 'type' +TYPE_POINT = 'Point' + + +class MapboxGeocoder(Traceable): + ''' + Python wrapper for the Mapbox Geocoder service. + ''' + + def __init__(self, token, logger, service_params=None): + service_params = service_params or {} + self._token = token + self._logger = logger + self._geocoder_name = service_params.get(GEOCODER_NAME, + DEFAULT_GEOCODER) + self._geocoder = Geocoder(access_token=self._token, + name=self._geocoder_name) + + @property + def name(self): + return MAPBOX + + def _extract_lng_lat_from_feature(self, feature): + geometry = feature[ENTRY_GEOMETRY] + if geometry[ENTRY_TYPE] == TYPE_POINT: + location = geometry[ENTRY_COORDINATES] + else: + location = feature[ENTRY_CENTER] + + longitude = location[0] + latitude = location[1] + return [longitude, latitude] + + def format_response(self, response): + json_response = json.loads(response) + + if json_response and json_response[ENTRY_FEATURES]: + feature = json_response[ENTRY_FEATURES][0] + + return self._extract_lng_lat_from_feature(feature) + else: + return [] + + @qps_retry(qps=10) + def geocode(self, searchtext, city=None, state_province=None, + country=None): + if searchtext and searchtext.strip(): + address = [searchtext] + if city: + address.append(city) + if state_province: + address.append(state_province) + else: + return [] + + country = [country] if country else None + + try: + response = self._geocoder.forward(address=', '.join(address).decode('utf-8'), + country=country, + limit=1) + + if response.status_code == requests.codes.ok: + return response.text + elif response.status_code == requests.codes.bad_request: + return None + else: + raise ServiceException(response.status_code, response) + except requests.Timeout as te: + # In case of timeout we want to stop the job because the server + # could be down + self._logger.error('Timeout connecting to Mapbox geocoding server', + te) + raise ServiceException('Error geocoding {0} using Mapbox'.format( + searchtext), None) + except requests.ConnectionError as ce: + # Don't raise the exception to continue with the geocoding job + self._logger.error('Error connecting to Mapbox geocoding server', + exception=ce) + return None diff --git a/server/lib/python/cartodb_services/cartodb_services/geocoder/geocoder_proxy.py b/server/lib/python/cartodb_services/cartodb_services/geocoder/geocoder_proxy.py new file mode 100644 index 00000000..1adfb0ff --- /dev/null +++ b/server/lib/python/cartodb_services/cartodb_services/geocoder/geocoder_proxy.py @@ -0,0 +1,52 @@ +from cartodb_services.metrics import Traceable +from cartodb_services.geocoder.cacheable_geocoder import is_geocoder_cacheable, CacheableGeocoder +from cartodb_services.geocoder.geocoder_mapbox import MapboxGeocoder +from types import MAPBOX, GOOGLE, HERE + +MAPBOX_APIKEY = 'YOUR_MAPBOX_API_KEY' # We need to pass this as a parameter + + +class GeocoderProxy(Traceable): + def __init__(self, geocoderStrategy, dbconn, logger): + self._logger = logger + self._geocoder_name = geocoderStrategy.findGeocoder() + cacheable = is_geocoder_cacheable(self._geocoder_name) + geocoder = self._get_geocoder(self._geocoder_name) + + self._cacheable_geocoder = CacheableGeocoder(geocoder, dbconn, + cacheable, cacheable) + + def geocode(self, searchtext, city=None, state_province=None, + country=None): + return self._cacheable_geocoder.geocode(searchtext, city, + state_province, country) + + def _get_geocoder(self, geocoder_name): + if geocoder_name == MAPBOX: + return MapboxGeocoder(MAPBOX_APIKEY, self._logger, None) + + return None + + +class GeocoderStrategy(): + def __init__(self, user_geocoders): + self._user_geocoders = user_geocoders + + def findGeocoder(self): + raise NotImplementedError + + +class FirstAvailableGeocoder(GeocoderStrategy): + available_geocoders = [MAPBOX, GOOGLE, HERE] + + def findGeocoder(self): + user_geocoders = [gc for gc in self._user_geocoders if gc in self.available_geocoders] + return user_geocoders[0] if user_geocoders else None + + +class CheapestGeocoder(GeocoderStrategy): + ordered_by_price = [MAPBOX, GOOGLE, HERE] + + def findGeocoder(self): + user_geocoders_ordered = [gc for gc in self.ordered_by_price if gc in self._user_geocoders] + return user_geocoders_ordered[0] if user_geocoders_ordered else None diff --git a/server/lib/python/cartodb_services/cartodb_services/geocoder/types.py b/server/lib/python/cartodb_services/cartodb_services/geocoder/types.py new file mode 100644 index 00000000..769c5b55 --- /dev/null +++ b/server/lib/python/cartodb_services/cartodb_services/geocoder/types.py @@ -0,0 +1,3 @@ +MAPBOX = 'mapbox' +GOOGLE = 'google' +HERE = 'here' diff --git a/server/lib/python/cartodb_services/test/test_geocoderproxy.py b/server/lib/python/cartodb_services/test/test_geocoderproxy.py new file mode 100644 index 00000000..dbd8cb39 --- /dev/null +++ b/server/lib/python/cartodb_services/test/test_geocoderproxy.py @@ -0,0 +1,42 @@ +import unittest +from mock import Mock +from cartodb_services.geocoder import (GeocoderProxy, FirstAvailableGeocoder, + CheapestGeocoder) +from cartodb_services.geocoder.types import MAPBOX, GOOGLE, HERE + + +class GeocoderProxyTestCase(unittest.TestCase): + def setUp(self): + pass + + def test_firstavailable(self): + available_geocoders = [MAPBOX] + geocoder_proxy = GeocoderProxy(FirstAvailableGeocoder( + available_geocoders), dbconn=Mock(), logger=Mock()) + self.assertEqual(geocoder_proxy._geocoder_name, MAPBOX) + + available_geocoders = [GOOGLE, MAPBOX, HERE] + geocoder_proxy = GeocoderProxy(FirstAvailableGeocoder( + available_geocoders), dbconn=Mock(), logger=Mock()) + self.assertEqual(geocoder_proxy._geocoder_name, GOOGLE) + + def test_cheapest(self): + available_geocoders = [MAPBOX] + geocoder_proxy = GeocoderProxy(CheapestGeocoder( + available_geocoders), dbconn=Mock(), logger=Mock()) + self.assertEqual(geocoder_proxy._geocoder_name, MAPBOX) + + available_geocoders = [GOOGLE, MAPBOX, HERE] + geocoder_proxy = GeocoderProxy(CheapestGeocoder( + available_geocoders), dbconn=Mock(), logger=Mock()) + self.assertEqual(geocoder_proxy._geocoder_name, MAPBOX) + + available_geocoders = [GOOGLE, HERE] + geocoder_proxy = GeocoderProxy(CheapestGeocoder( + available_geocoders), dbconn=Mock(), logger=Mock()) + self.assertEqual(geocoder_proxy._geocoder_name, GOOGLE) + + available_geocoders = ['non_existing1', 'non_existing2'] + geocoder_proxy = GeocoderProxy(CheapestGeocoder( + available_geocoders), dbconn=Mock(), logger=Mock()) + self.assertEqual(geocoder_proxy._geocoder_name, None)