diff --git a/.gitignore b/.gitignore index 856af55b..ca8835d9 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ venv-hooked-on-sources # PyCharm/JetBrains IDE files .idea +process_one-???????? +work-???????? diff --git a/.travis.yml b/.travis.yml index 23ac01ff..f149a313 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,33 +1,11 @@ -language: python sudo: required -dist: trusty -python: - - "3.4" -cache: - apt: true - pip: true -addons: - postgresql: "9.3" +services: + - docker before_install: - # Remove existing PostGIS to make room for our own. - - sudo apt-get remove -y postgis* -install: - # Install Machine globally via Chef recipe, to pick up complete dependencies. - - sudo chef/run.sh prereqs - - sudo chef/run.sh testing - # Determine GDAL library version and install a compatible python binding. - # http://gis.stackexchange.com/questions/28966/python-gdal-package-missing-header-file-when-installing-via-pip - - CPLUS_INCLUDE_PATH=/usr/include/gdal C_INCLUDE_PATH=/usr/include/gdal pip install "GDAL==`gdal-config --version`" - # cairocffi is a drop-in replacement for Pycairo, which is absent from pip. - # http://stackoverflow.com/questions/11491268/install-pycairo-in-virtualenv - # https://pythonhosted.org/cairocffi/ - - pip install cairocffi - # Install Machine for virtualenv, to test with this Python version. - - pip install -U . - # Create necessary database tables. - - openaddr-ci-recreate-db - # Destroy the evidence, so that tests use a setup-installed version of openaddr. - - find openaddr -type f -a ! -name VERSION -delete -env: BOTO_CONFIG=/tmp/nowhere DATABASE_URL=postgres://openaddr:openaddr@localhost/openaddr -script: python setup.py test - + - docker pull openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x || true + - docker build -f Dockerfile-prereqs -t openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x . + - docker build -f Dockerfile-machine -t openaddr/machine:`cut -f1 -d. openaddr/VERSION`.x . +script: + # Postgres needs a little time + - docker-compose up -d && sleep 15 + - docker-compose run machine python3 /usr/local/src/openaddr/test.py diff --git a/Dockerfile-machine b/Dockerfile-machine new file mode 100644 index 00000000..bee4d434 --- /dev/null +++ b/Dockerfile-machine @@ -0,0 +1,6 @@ +FROM openaddr/prereqs:4.x + +# From chef/openaddr/recipes/default.rb +COPY . /usr/local/src/openaddr +RUN cd /usr/local/src/openaddr && \ + pip3 install -U . diff --git a/Dockerfile-prereqs b/Dockerfile-prereqs new file mode 100644 index 00000000..db8e5370 --- /dev/null +++ b/Dockerfile-prereqs @@ -0,0 +1,34 @@ +FROM ubuntu:14.04 + +RUN apt-get update -y && \ + apt-get install -y software-properties-common python-software-properties + +ENV LC_ALL=C.UTF-8 + +# From chef/prereqs/recipes/default.rb +RUN add-apt-repository -y ppa:openaddresses/gdal2 && \ + apt-get update -y && \ + apt-get install -y python3-pip + +# # Watch for compatibility between awscli, botocore, and boto3. +# RUN apt-get install -y libyaml-dev && \ +# pip3 install -U 'awscli == 1.11.50' 'botocore == 1.5.14' + +# From chef/openaddr-prereqs/recipes/default.rb +RUN apt-get install -y python3-cairo libgeos-c1v5=3.5.0-1~trusty1 \ + libgdal20=2.1.0+dfsg-1~trusty2 python3-gdal=2.1.0+dfsg-1~trusty2 \ + python3-pip python3-dev libpq-dev memcached libffi-dev \ + gdal-bin=2.1.0+dfsg-1~trusty2 libgdal-dev=2.1.0+dfsg-1~trusty2 + +# From chef/tippecanoe/recipes/default.rb +RUN apt-get install -y git build-essential libsqlite3-dev protobuf-compiler libprotobuf-dev && \ + git clone -b 1.15.1 https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe && \ + cd /tmp/tippecanoe && \ + make && \ + PREFIX=/usr/local make install && \ + rm -rf /tmp/tippecanoe + +# # From chef/openaddr/recipes/default.rb +# COPY . /usr/local/src/openaddr +# RUN cd /usr/local/src/openaddr && \ +# pip3 install -U . diff --git a/circle.yml b/circle.yml index 3127b853..7067930b 100644 --- a/circle.yml +++ b/circle.yml @@ -1,32 +1,28 @@ machine: - python: - version: 3.4.4 - environment: - BOTO_CONFIG: /tmp/nowhere - DATABASE_URL: postgres://openaddr:openaddr@localhost/openaddr + services: + - docker dependencies: pre: - # Remove existing PostGIS and Postgres to make room for our own. - - sudo apt-get autoremove -y postgis* postgresql* + - docker pull openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x || true override: - # Install Machine globally via Chef recipe, to pick up complete dependencies. - - sudo chef/run.sh prereqs - - sudo chef/run.sh testing - # Determine GDAL library version and install a compatible python binding. - # http://gis.stackexchange.com/questions/28966/python-gdal-package-missing-header-file-when-installing-via-pip - - CPLUS_INCLUDE_PATH=/usr/include/gdal C_INCLUDE_PATH=/usr/include/gdal pip install "GDAL==`gdal-config --version`" - # cairocffi is a drop-in replacement for Pycairo, which is absent from pip. - # http://stackoverflow.com/questions/11491268/install-pycairo-in-virtualenv - # https://pythonhosted.org/cairocffi/ - - pip install cairocffi - # Install Machine for virtualenv, to test with this Python version. - - pip install -U . - # Create necessary database tables. - - openaddr-ci-recreate-db - # Destroy the evidence, so that tests use a setup-installed version of openaddr. - - find openaddr -type f -a ! -name VERSION -delete + - docker build -f Dockerfile-prereqs -t openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x . + - docker build -f Dockerfile-machine -t openaddr/machine:`cut -f1 -d. openaddr/VERSION`.x . test: override: - - python setup.py test + # Postgres needs a little time + - docker-compose up -d && sleep 15 + - docker-compose run machine python3 /usr/local/src/openaddr/test.py + +deployment: + hub: + branch: [master, migurski/docker-docker-docker] + commands: + - docker login -e $DOCKER_EMAIL -u $DOCKER_USER -p $DOCKER_PASS + - docker tag openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x openaddr/prereqs:`cat openaddr/VERSION` + - docker tag openaddr/machine:`cut -f1 -d. openaddr/VERSION`.x openaddr/machine:`cat openaddr/VERSION` + - docker push openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x + - docker push openaddr/machine:`cut -f1 -d. openaddr/VERSION`.x + - docker push openaddr/prereqs:`cat openaddr/VERSION` + - docker push openaddr/machine:`cat openaddr/VERSION` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..ca97e256 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +machine: + image: openaddr/machine:4.x + environment: + - DATABASE_URL=postgresql://openaddr:openaddr@postgres/openaddr + links: + - postgres + volumes: + - .:/vol +postgres: + image: mdillon/postgis:9.3 + environment: + - POSTGRES_USER=openaddr + - POSTGRES_PASSWORD=openaddr diff --git a/docs/install.md b/docs/install.md index 57df7d0e..f561e99e 100644 --- a/docs/install.md +++ b/docs/install.md @@ -3,52 +3,73 @@ Install This document describes how to install the Machine code for local development, and demonstrates two ways to use it: running a single source and running a complete batch set. If you’re editing a lot of sources and want to do it quickly without waiting for a remote Github-based continuous integration service, you may want to use run single sources locally. If you're working on the queuing and job control portions of Machine code, you may want to run complete batch sets on test data. -Local Development ------------------ - -You can edit a local copy of OpenAddresses code with working tests by installing -everything onto a local virtual machine using [VirtualBox](https://www.virtualbox.org) -and [Vagrant](https://www.vagrantup.com). This process should take about 10-20 -minutes depending on download speed. - -1. Download and install [VirtualBox](https://www.virtualbox.org) and [Vagrant](https://www.vagrantup.com) on your development machine. Both are available as separate installs, or as [part of Homebrew](https://brew.sh). - - Ensure that `VBoxManage` is in your path. If you download [VirtualBox from the website](https://www.virtualbox.org/wiki/Downloads), `VBoxManage` may be located in `/Applications/VirtualBox.app/Contents/MacOS` and you will need to [add it to your shell path](https://kb.iu.edu/d/acar). +Running A Source Locally +------------------------ -2. Clone [OpenAddresses Machine code](https://github.com/openaddresses/machine) from Github. +Run a single source without installing Python or other packages locally +using [OpenAddresses from Docker Hub](https://hub.docker.com/r/openaddr/). -3. From inside the machine folder, prepare the VirtualBox virtual machine with this command: +1. Get the latest OpenAddresses image from Docker Hub: + + docker pull openaddr/machine:latest - vagrant up +2. Download a source from [OpenAdresses/openaddresses on Github](https://github.com/openaddresses/openaddresses). [Berkeley, California](https://results.openaddresses.io/sources/us/ca/berkeley) is a small, reliable source that’s good to test with: - You’ll see a few notices scroll by to know that this process is working: + curl -o us-ca-berkeley.json \ + -L https://github.com/openaddresses/openaddresses/raw/master/sources/us/ca/berkeley.json - ==> default: Importing base box 'ubuntu/trusty64'... - ==> default: Setting the name of the VM: OpenAddresses-Machine_default_1487786156783_59682 - ==> default: Waiting for machine to boot. This may take a few minutes... - ==> default: Machine booted and ready! +3. Using Docker, run `openaddr-process-one` to process the source: - This last part can take ~5 minutes: + docker run --volume `pwd`:/vol openaddr/machine \ + openaddr-process-one -v vol/us-ca-berkeley.json vol - ==> default: Mounting shared folders... - default: /home/vagrant/machine => /Users/jrandom/Sites/OpenAddresses-Machine - ==> default: Running provisioner: shell... - default: Running: inline script +4. Look in the directory `us-ca-berkeley` for address output, logs, and other files. -4. Connect to the virtual machine with this command: - - vagrant ssh - -5. Run the complete test suite to verify that it works: - - cd machine - python3 test.py +Local Development +----------------- -You should now be able to make changes and test them. The virtual machine’s -`/home/vagrant/machine` directory is a mount of your host machine’s current directory, so you -will be able to edit files in your normal text editor. Be sure to use `pip3` and -`python3` when running, or [set up an optional quick local virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) -with Python 3 and the [`--editable` flag](https://pip.pypa.io/en/stable/reference/pip_install/#install-editable). +You can edit a local copy of OpenAddresses code with working tests by installing +everything onto a local virtual machine using [Docker](https://www.docker.com). +This process should take 5-10 minutes depending on download speed. + +1. Download and install [Docker](https://www.docker.com). On Mac OS X, + use [Docker for Mac](https://docs.docker.com/docker-for-mac/). On Ubuntu, + run `apt-get install docker.io` or follow [Docker’s own directions](https://docs.docker.com/engine/installation/linux/ubuntu/). + +2. Build the required images, which includes binary packages like GDAL and Postgres. + + docker-compose build + +3. Run everything in detached mode: + + docker-compose up -d + + Run `docker ps -a` to see output like this: + + IMAGE STATUS NAMES + ... openaddr/machine:latest ... Exited (0) 44 seconds ago ... openaddressesmachine_machine_1 + mdillon/postgis:9.3 Up 45 seconds openaddressesmachine_postgres_1 + +4. Connect to the OpenAddresses image `openaddr/machine` with a bash shell + and the current working directory mapped to `/vol`: + + docker-compose run machine bash + +5. Build the OpenAddresses packages using + [virtualenv](https://packaging.python.org/installing/#creating-virtual-environments) + and [pip](https://packaging.python.org/installing/#use-pip-for-installing). + The `-e` flag to `pip install` insures that your local copy of OpenAddresses + is used, so that you can test changes to the code made in your own editor: + + pip3 install virtualenv + virtualenv -p python3 --system-site-packages venv + source venv/bin/activate + pip3 install -e file:///vol + +You should now be able to make changes and test them. +If you exit the Docker container, changes made in step 5 above will be lost. +Use [Docker commit](https://docs.docker.com/engine/reference/commandline/commit/) +or similar if you need to save them. Running A First Source ---------------------- @@ -57,7 +78,8 @@ You can process a single individual source of OpenAddresses data with the comman 1. Download a source from [OpenAdresses/openaddresses on Github](https://github.com/openaddresses/openaddresses). [Berkeley, California](https://results.openaddresses.io/sources/us/ca/berkeley) is a small, reliable source that’s good to test with: - curl -L https://github.com/openaddresses/openaddresses/raw/master/sources/us/ca/berkeley.json -o us-ca-berkeley.json + curl -o us-ca-berkeley.json \ + -L https://github.com/openaddresses/openaddresses/raw/master/sources/us/ca/berkeley.json 2. Run `openaddr-process-one` to process the source: diff --git a/openaddr/ci/schema.pgsql b/openaddr/ci/schema.pgsql index bf2c8831..82d4730a 100644 --- a/openaddr/ci/schema.pgsql +++ b/openaddr/ci/schema.pgsql @@ -99,7 +99,7 @@ CREATE VIEW dashboard_runs AS SELECT round(extract(epoch from datetime_start)::numeric, 3)::text AS tsname FROM sets; -GRANT SELECT ON dashboard_runs TO dashboard; +--GRANT SELECT ON dashboard_runs TO dashboard; CREATE VIEW dashboard_stats AS SELECT round(extract(epoch from s.datetime_start)::numeric, 3)::text AS tsname, @@ -120,4 +120,4 @@ CREATE VIEW dashboard_stats AS AND s.datetime_end IS NOT NULL AND r.state::text != 'null'; -GRANT SELECT ON dashboard_stats TO dashboard; +--GRANT SELECT ON dashboard_stats TO dashboard; diff --git a/openaddr/ci/worker.py b/openaddr/ci/worker.py index 36749b25..4d72f4d2 100755 --- a/openaddr/ci/worker.py +++ b/openaddr/ci/worker.py @@ -15,7 +15,7 @@ from . import ( db_connect, db_queue, db_queue, pop_task_from_taskqueue, - DONE_QUEUE, TASK_QUEUE, DUE_QUEUE, setup_logger, HEARTBEAT_QUEUE, + DONE_QUEUE, TASK_QUEUE_2, DUE_QUEUE, setup_logger, HEARTBEAT_QUEUE, log_function_errors ) @@ -58,7 +58,7 @@ def main(): try: with db_connect(args.database_url) as conn: - task_Q = db_queue(conn, TASK_QUEUE) + task_Q = db_queue(conn, TASK_QUEUE_2) done_Q = db_queue(conn, DONE_QUEUE) due_Q = db_queue(conn, DUE_QUEUE) beat_Q = db_queue(conn, HEARTBEAT_QUEUE) diff --git a/openaddr/render.py b/openaddr/render.py index ddacfb60..957c2625 100644 --- a/openaddr/render.py +++ b/openaddr/render.py @@ -105,7 +105,7 @@ def iterate_sources_dir(sources_dir): _, ext = splitext(filename.lower()) if ext == '.json': path = relpath(join(dirname, filename), sources_dir) - yield normalize('NFC', path) + yield path # yield normalize('NFC', path) def load_fake_state(sources_dir): ''' diff --git a/openaddr/tests/__init__.py b/openaddr/tests/__init__.py index 36b08297..fb43aa58 100644 --- a/openaddr/tests/__init__.py +++ b/openaddr/tests/__init__.py @@ -34,6 +34,7 @@ from os.path import dirname, join, basename, exists, splitext from contextlib import contextmanager from subprocess import Popen, PIPE +from unicodedata import normalize from threading import Lock if sys.platform != 'win32': @@ -987,19 +988,15 @@ def test_single_fr_paris(self): def test_single_fr_lareunion(self): ''' Test complete process_one.process on data that uses non-UTF8 encoding (issue #136) ''' - # Common encoding of la-réunion uses U+00E9: - # http://www.fileformat.info/info/unicode/char/e9/index.htm - filename_00E9 = b'fr/la-r\xc3\xa9union.json'.decode('utf8') - - # Less-common encoding of la-réunion uses combining character U+0301: - # http://www.fileformat.info/info/unicode/char/0301/index.htm - filename_0301 = b'fr/la-re\xcc\x81union.json'.decode('utf8') - - if os.path.exists(join(self.src_dir, filename_00E9)): - source = join(self.src_dir, filename_00E9) - elif os.path.exists(join(self.src_dir, filename_0301)): - source = join(self.src_dir, filename_0301) - else: + source = None + + for form in ('NFC', 'NFD'): + normalized = normalize(form, u'fr/la-réunion.json') + if os.path.exists(join(self.src_dir, normalized)): + source = join(self.src_dir, normalized) + break + + if source is None: raise Exception('Could not find a usable fr/la-réunion.json') with HTTMock(self.response_content):