Skip to content
This repository was archived by the owner on Jan 5, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from geocoder_proxy import (GeocoderProxy, FirstAvailableGeocoder,
CheapestGeocoder)
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from types import MAPBOX

CACHEABLE_GEOCODERS = [MAPBOX]

SELECT_CACHE = ("select response from geocode_cache_{provider} "
"where id = '{id}'")
INSERT_CACHE = ("insert into geocode_cache_{provider} "
"(id, searchtext, city, state_province, country, response) "
"values (%s, %s, %s, %s, %s, %s) ")


def is_geocoder_cacheable(geocoder_name):
return geocoder_name in CACHEABLE_GEOCODERS


class CacheableGeocoder:
def __init__(self, geocoder, dbconn, use_cache=True, save_cache=True):
self._geocoder = geocoder
self._dbconn = dbconn
self._use_cache = use_cache
self._save_cache = save_cache

def geocode(self, searchtext, city=None, state_province=None,
country=None):
response = None
if self._use_cache and is_geocoder_cacheable(self._geocoder.name):
response = self._find_in_cache(searchtext, city, state_province,
country)

if not response:
response = self._lookup_geocoder(searchtext, city, state_province,
country)

return self._geocoder.format_response(response)

def _find_in_cache(self, searchtext, city=None, state_province=None,
country=None):
normalized_address = self._normalize_address(searchtext, city,
state_province, country)
with self._dbconn.cursor() as cursor:
cursor.execute(SELECT_CACHE.format(provider=self._geocoder.name,
id=normalized_address))
data = cursor.fetchone()
return data[0] if data else None

return None

def _save_to_cache(self, response, searchtext, city=None,
state_province=None, country=None):
normalized_address = self._normalize_address(searchtext, city,
state_province, country)
response = response.encode('utf-8') # https://github.com/requests/requests/issues/1604

with self._dbconn.cursor() as cursor:
cursor.execute(INSERT_CACHE.format(provider=self._geocoder.name),
(normalized_address, searchtext, city,
state_province, country, response))
self._dbconn.commit()

def _lookup_geocoder(self, searchtext, city=None, state_province=None,
country=None):
response = self._geocoder.geocode(searchtext, city, state_province,
country)

if self._save_cache and is_geocoder_cacheable(self._geocoder.name):
self._save_to_cache(response, searchtext, city, state_province,
country)

return response

def _normalize_address(self, searchtext, city=None, state_province=None,
country=None):
if searchtext and searchtext.strip():
address = [searchtext]
if city:
address.append(city)
if state_province:
address.append(state_province)
if country:
address.append(country)

address = ', '.join(address)

return ' '.join(address.replace(',', ' ').split()).replace(' ', '_')

return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
'''
This is a shameless copy of the Mapbox geocoder.
We will need to refactor it to return the whole response
and then delete this file.
'''
import json
import requests
from mapbox import Geocoder
from cartodb_services.metrics import Traceable
from cartodb_services.tools.exceptions import ServiceException
from cartodb_services.tools.qps import qps_retry
from types import MAPBOX

GEOCODER_NAME = 'geocoder_name'
EPHEMERAL_GEOCODER = 'mapbox.places'
PERMANENT_GEOCODER = 'mapbox.places-permanent'
DEFAULT_GEOCODER = PERMANENT_GEOCODER

ENTRY_FEATURES = 'features'
ENTRY_CENTER = 'center'
ENTRY_GEOMETRY = 'geometry'
ENTRY_COORDINATES = 'coordinates'
ENTRY_TYPE = 'type'
TYPE_POINT = 'Point'


class MapboxGeocoder(Traceable):
'''
Python wrapper for the Mapbox Geocoder service.
'''

def __init__(self, token, logger, service_params=None):
service_params = service_params or {}
self._token = token
self._logger = logger
self._geocoder_name = service_params.get(GEOCODER_NAME,
DEFAULT_GEOCODER)
self._geocoder = Geocoder(access_token=self._token,
name=self._geocoder_name)

@property
def name(self):
return MAPBOX

def _extract_lng_lat_from_feature(self, feature):
geometry = feature[ENTRY_GEOMETRY]
if geometry[ENTRY_TYPE] == TYPE_POINT:
location = geometry[ENTRY_COORDINATES]
else:
location = feature[ENTRY_CENTER]

longitude = location[0]
latitude = location[1]
return [longitude, latitude]

def format_response(self, response):
json_response = json.loads(response)

if json_response and json_response[ENTRY_FEATURES]:
feature = json_response[ENTRY_FEATURES][0]

return self._extract_lng_lat_from_feature(feature)
else:
return []

@qps_retry(qps=10)
def geocode(self, searchtext, city=None, state_province=None,
country=None):
if searchtext and searchtext.strip():
address = [searchtext]
if city:
address.append(city)
if state_province:
address.append(state_province)
else:
return []

country = [country] if country else None

try:
response = self._geocoder.forward(address=', '.join(address).decode('utf-8'),
country=country,
limit=1)

if response.status_code == requests.codes.ok:
return response.text
elif response.status_code == requests.codes.bad_request:
return None
else:
raise ServiceException(response.status_code, response)
except requests.Timeout as te:
# In case of timeout we want to stop the job because the server
# could be down
self._logger.error('Timeout connecting to Mapbox geocoding server',
te)
raise ServiceException('Error geocoding {0} using Mapbox'.format(
searchtext), None)
except requests.ConnectionError as ce:
# Don't raise the exception to continue with the geocoding job
self._logger.error('Error connecting to Mapbox geocoding server',
exception=ce)
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from cartodb_services.metrics import Traceable
from cartodb_services.geocoder.cacheable_geocoder import is_geocoder_cacheable, CacheableGeocoder
from cartodb_services.geocoder.geocoder_mapbox import MapboxGeocoder
from types import MAPBOX, GOOGLE, HERE

MAPBOX_APIKEY = 'YOUR_MAPBOX_API_KEY' # We need to pass this as a parameter


class GeocoderProxy(Traceable):
def __init__(self, geocoderStrategy, dbconn, logger):
self._logger = logger
self._geocoder_name = geocoderStrategy.findGeocoder()
cacheable = is_geocoder_cacheable(self._geocoder_name)
geocoder = self._get_geocoder(self._geocoder_name)

self._cacheable_geocoder = CacheableGeocoder(geocoder, dbconn,
cacheable, cacheable)

def geocode(self, searchtext, city=None, state_province=None,
country=None):
return self._cacheable_geocoder.geocode(searchtext, city,
state_province, country)

def _get_geocoder(self, geocoder_name):
if geocoder_name == MAPBOX:
return MapboxGeocoder(MAPBOX_APIKEY, self._logger, None)

return None


class GeocoderStrategy():
def __init__(self, user_geocoders):
self._user_geocoders = user_geocoders

def findGeocoder(self):
raise NotImplementedError


class FirstAvailableGeocoder(GeocoderStrategy):
available_geocoders = [MAPBOX, GOOGLE, HERE]

def findGeocoder(self):
user_geocoders = [gc for gc in self._user_geocoders if gc in self.available_geocoders]
return user_geocoders[0] if user_geocoders else None


class CheapestGeocoder(GeocoderStrategy):
ordered_by_price = [MAPBOX, GOOGLE, HERE]

def findGeocoder(self):
user_geocoders_ordered = [gc for gc in self.ordered_by_price if gc in self._user_geocoders]
return user_geocoders_ordered[0] if user_geocoders_ordered else None
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MAPBOX = 'mapbox'
GOOGLE = 'google'
HERE = 'here'
42 changes: 42 additions & 0 deletions server/lib/python/cartodb_services/test/test_geocoderproxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import unittest
from mock import Mock
from cartodb_services.geocoder import (GeocoderProxy, FirstAvailableGeocoder,
CheapestGeocoder)
from cartodb_services.geocoder.types import MAPBOX, GOOGLE, HERE


class GeocoderProxyTestCase(unittest.TestCase):
def setUp(self):
pass

def test_firstavailable(self):
available_geocoders = [MAPBOX]
geocoder_proxy = GeocoderProxy(FirstAvailableGeocoder(
available_geocoders), dbconn=Mock(), logger=Mock())
self.assertEqual(geocoder_proxy._geocoder_name, MAPBOX)

available_geocoders = [GOOGLE, MAPBOX, HERE]
geocoder_proxy = GeocoderProxy(FirstAvailableGeocoder(
available_geocoders), dbconn=Mock(), logger=Mock())
self.assertEqual(geocoder_proxy._geocoder_name, GOOGLE)

def test_cheapest(self):
available_geocoders = [MAPBOX]
geocoder_proxy = GeocoderProxy(CheapestGeocoder(
available_geocoders), dbconn=Mock(), logger=Mock())
self.assertEqual(geocoder_proxy._geocoder_name, MAPBOX)

available_geocoders = [GOOGLE, MAPBOX, HERE]
geocoder_proxy = GeocoderProxy(CheapestGeocoder(
available_geocoders), dbconn=Mock(), logger=Mock())
self.assertEqual(geocoder_proxy._geocoder_name, MAPBOX)

available_geocoders = [GOOGLE, HERE]
geocoder_proxy = GeocoderProxy(CheapestGeocoder(
available_geocoders), dbconn=Mock(), logger=Mock())
self.assertEqual(geocoder_proxy._geocoder_name, GOOGLE)

available_geocoders = ['non_existing1', 'non_existing2']
geocoder_proxy = GeocoderProxy(CheapestGeocoder(
available_geocoders), dbconn=Mock(), logger=Mock())
self.assertEqual(geocoder_proxy._geocoder_name, None)