Skip to content
This repository was archived by the owner on May 5, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ venv-hooked-on-sources

# PyCharm/JetBrains IDE files
.idea
process_one-????????
work-????????
40 changes: 9 additions & 31 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,11 @@
language: python
sudo: required
dist: trusty
python:
- "3.4"
cache:
apt: true
pip: true
addons:
postgresql: "9.3"
services:
- docker
before_install:
# Remove existing PostGIS to make room for our own.
- sudo apt-get remove -y postgis*
install:
# Install Machine globally via Chef recipe, to pick up complete dependencies.
- sudo chef/run.sh prereqs
- sudo chef/run.sh testing
# Determine GDAL library version and install a compatible python binding.
# http://gis.stackexchange.com/questions/28966/python-gdal-package-missing-header-file-when-installing-via-pip
- CPLUS_INCLUDE_PATH=/usr/include/gdal C_INCLUDE_PATH=/usr/include/gdal pip install "GDAL==`gdal-config --version`"
# cairocffi is a drop-in replacement for Pycairo, which is absent from pip.
# http://stackoverflow.com/questions/11491268/install-pycairo-in-virtualenv
# https://pythonhosted.org/cairocffi/
- pip install cairocffi
# Install Machine for virtualenv, to test with this Python version.
- pip install -U .
# Create necessary database tables.
- openaddr-ci-recreate-db
# Destroy the evidence, so that tests use a setup-installed version of openaddr.
- find openaddr -type f -a ! -name VERSION -delete
env: BOTO_CONFIG=/tmp/nowhere DATABASE_URL=postgres://openaddr:openaddr@localhost/openaddr
script: python setup.py test

- docker pull openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x || true
- docker build -f Dockerfile-prereqs -t openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x .
- docker build -f Dockerfile-machine -t openaddr/machine:`cut -f1 -d. openaddr/VERSION`.x .
script:
# Postgres needs a little time
- docker-compose up -d && sleep 15
- docker-compose run machine python3 /usr/local/src/openaddr/test.py
6 changes: 6 additions & 0 deletions Dockerfile-machine
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM openaddr/prereqs:4.x

# From chef/openaddr/recipes/default.rb
COPY . /usr/local/src/openaddr
RUN cd /usr/local/src/openaddr && \
pip3 install -U .
34 changes: 34 additions & 0 deletions Dockerfile-prereqs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM ubuntu:14.04

RUN apt-get update -y && \
apt-get install -y software-properties-common python-software-properties

ENV LC_ALL=C.UTF-8

# From chef/prereqs/recipes/default.rb
RUN add-apt-repository -y ppa:openaddresses/gdal2 && \
apt-get update -y && \
apt-get install -y python3-pip

# # Watch for compatibility between awscli, botocore, and boto3.
# RUN apt-get install -y libyaml-dev && \
# pip3 install -U 'awscli == 1.11.50' 'botocore == 1.5.14'

# From chef/openaddr-prereqs/recipes/default.rb
RUN apt-get install -y python3-cairo libgeos-c1v5=3.5.0-1~trusty1 \
libgdal20=2.1.0+dfsg-1~trusty2 python3-gdal=2.1.0+dfsg-1~trusty2 \
python3-pip python3-dev libpq-dev memcached libffi-dev \
gdal-bin=2.1.0+dfsg-1~trusty2 libgdal-dev=2.1.0+dfsg-1~trusty2

# From chef/tippecanoe/recipes/default.rb
RUN apt-get install -y git build-essential libsqlite3-dev protobuf-compiler libprotobuf-dev && \
git clone -b 1.15.1 https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe && \
cd /tmp/tippecanoe && \
make && \
PREFIX=/usr/local make install && \
rm -rf /tmp/tippecanoe

# # From chef/openaddr/recipes/default.rb
# COPY . /usr/local/src/openaddr
# RUN cd /usr/local/src/openaddr && \
# pip3 install -U .
44 changes: 20 additions & 24 deletions circle.yml
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
machine:
python:
version: 3.4.4
environment:
BOTO_CONFIG: /tmp/nowhere
DATABASE_URL: postgres://openaddr:openaddr@localhost/openaddr
services:
- docker

dependencies:
pre:
# Remove existing PostGIS and Postgres to make room for our own.
- sudo apt-get autoremove -y postgis* postgresql*
- docker pull openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x || true
override:
# Install Machine globally via Chef recipe, to pick up complete dependencies.
- sudo chef/run.sh prereqs
- sudo chef/run.sh testing
# Determine GDAL library version and install a compatible python binding.
# http://gis.stackexchange.com/questions/28966/python-gdal-package-missing-header-file-when-installing-via-pip
- CPLUS_INCLUDE_PATH=/usr/include/gdal C_INCLUDE_PATH=/usr/include/gdal pip install "GDAL==`gdal-config --version`"
# cairocffi is a drop-in replacement for Pycairo, which is absent from pip.
# http://stackoverflow.com/questions/11491268/install-pycairo-in-virtualenv
# https://pythonhosted.org/cairocffi/
- pip install cairocffi
# Install Machine for virtualenv, to test with this Python version.
- pip install -U .
# Create necessary database tables.
- openaddr-ci-recreate-db
# Destroy the evidence, so that tests use a setup-installed version of openaddr.
- find openaddr -type f -a ! -name VERSION -delete
- docker build -f Dockerfile-prereqs -t openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x .
- docker build -f Dockerfile-machine -t openaddr/machine:`cut -f1 -d. openaddr/VERSION`.x .

test:
override:
- python setup.py test
# Postgres needs a little time
- docker-compose up -d && sleep 15
- docker-compose run machine python3 /usr/local/src/openaddr/test.py

deployment:
hub:
branch: [master, migurski/docker-docker-docker]
commands:
- docker login -e $DOCKER_EMAIL -u $DOCKER_USER -p $DOCKER_PASS
- docker tag openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x openaddr/prereqs:`cat openaddr/VERSION`
- docker tag openaddr/machine:`cut -f1 -d. openaddr/VERSION`.x openaddr/machine:`cat openaddr/VERSION`
- docker push openaddr/prereqs:`cut -f1 -d. openaddr/VERSION`.x
- docker push openaddr/machine:`cut -f1 -d. openaddr/VERSION`.x
- docker push openaddr/prereqs:`cat openaddr/VERSION`
- docker push openaddr/machine:`cat openaddr/VERSION`
13 changes: 13 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
machine:
image: openaddr/machine:4.x
environment:
- DATABASE_URL=postgresql://openaddr:openaddr@postgres/openaddr
links:
- postgres
volumes:
- .:/vol
postgres:
image: mdillon/postgis:9.3
environment:
- POSTGRES_USER=openaddr
- POSTGRES_PASSWORD=openaddr
98 changes: 60 additions & 38 deletions docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,73 @@ Install

This document describes how to install the Machine code for local development, and demonstrates two ways to use it: running a single source and running a complete batch set. If you’re editing a lot of sources and want to do it quickly without waiting for a remote Github-based continuous integration service, you may want to use run single sources locally. If you're working on the queuing and job control portions of Machine code, you may want to run complete batch sets on test data.

Local Development
-----------------

You can edit a local copy of OpenAddresses code with working tests by installing
everything onto a local virtual machine using [VirtualBox](https://www.virtualbox.org)
and [Vagrant](https://www.vagrantup.com). This process should take about 10-20
minutes depending on download speed.

1. Download and install [VirtualBox](https://www.virtualbox.org) and [Vagrant](https://www.vagrantup.com) on your development machine. Both are available as separate installs, or as [part of Homebrew](https://brew.sh).

Ensure that `VBoxManage` is in your path. If you download [VirtualBox from the website](https://www.virtualbox.org/wiki/Downloads), `VBoxManage` may be located in `/Applications/VirtualBox.app/Contents/MacOS` and you will need to [add it to your shell path](https://kb.iu.edu/d/acar).
Running A Source Locally
------------------------

2. Clone [OpenAddresses Machine code](https://github.com/openaddresses/machine) from Github.
Run a single source without installing Python or other packages locally
using [OpenAddresses from Docker Hub](https://hub.docker.com/r/openaddr/).

3. From inside the machine folder, prepare the VirtualBox virtual machine with this command:
1. Get the latest OpenAddresses image from Docker Hub:

docker pull openaddr/machine:latest

vagrant up
2. Download a source from [OpenAdresses/openaddresses on Github](https://github.com/openaddresses/openaddresses). [Berkeley, California](https://results.openaddresses.io/sources/us/ca/berkeley) is a small, reliable source that’s good to test with:

You’ll see a few notices scroll by to know that this process is working:
curl -o us-ca-berkeley.json \
-L https://github.com/openaddresses/openaddresses/raw/master/sources/us/ca/berkeley.json

==> default: Importing base box 'ubuntu/trusty64'...
==> default: Setting the name of the VM: OpenAddresses-Machine_default_1487786156783_59682
==> default: Waiting for machine to boot. This may take a few minutes...
==> default: Machine booted and ready!
3. Using Docker, run `openaddr-process-one` to process the source:

This last part can take ~5 minutes:
docker run --volume `pwd`:/vol openaddr/machine \
openaddr-process-one -v vol/us-ca-berkeley.json vol

==> default: Mounting shared folders...
default: /home/vagrant/machine => /Users/jrandom/Sites/OpenAddresses-Machine
==> default: Running provisioner: shell...
default: Running: inline script
4. Look in the directory `us-ca-berkeley` for address output, logs, and other files.

4. Connect to the virtual machine with this command:

vagrant ssh

5. Run the complete test suite to verify that it works:

cd machine
python3 test.py
Local Development
-----------------

You should now be able to make changes and test them. The virtual machine’s
`/home/vagrant/machine` directory is a mount of your host machine’s current directory, so you
will be able to edit files in your normal text editor. Be sure to use `pip3` and
`python3` when running, or [set up an optional quick local virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/)
with Python 3 and the [`--editable` flag](https://pip.pypa.io/en/stable/reference/pip_install/#install-editable).
You can edit a local copy of OpenAddresses code with working tests by installing
everything onto a local virtual machine using [Docker](https://www.docker.com).
This process should take 5-10 minutes depending on download speed.

1. Download and install [Docker](https://www.docker.com). On Mac OS X,
use [Docker for Mac](https://docs.docker.com/docker-for-mac/). On Ubuntu,
run `apt-get install docker.io` or follow [Docker’s own directions](https://docs.docker.com/engine/installation/linux/ubuntu/).

2. Build the required images, which includes binary packages like GDAL and Postgres.

docker-compose build

3. Run everything in detached mode:

docker-compose up -d

Run `docker ps -a` to see output like this:

IMAGE STATUS NAMES
... openaddr/machine:latest ... Exited (0) 44 seconds ago ... openaddressesmachine_machine_1
mdillon/postgis:9.3 Up 45 seconds openaddressesmachine_postgres_1

4. Connect to the OpenAddresses image `openaddr/machine` with a bash shell
and the current working directory mapped to `/vol`:

docker-compose run machine bash

5. Build the OpenAddresses packages using
[virtualenv](https://packaging.python.org/installing/#creating-virtual-environments)
and [pip](https://packaging.python.org/installing/#use-pip-for-installing).
The `-e` flag to `pip install` insures that your local copy of OpenAddresses
is used, so that you can test changes to the code made in your own editor:

pip3 install virtualenv
virtualenv -p python3 --system-site-packages venv
source venv/bin/activate
pip3 install -e file:///vol

You should now be able to make changes and test them.
If you exit the Docker container, changes made in step 5 above will be lost.
Use [Docker commit](https://docs.docker.com/engine/reference/commandline/commit/)
or similar if you need to save them.

Running A First Source
----------------------
Expand All @@ -57,7 +78,8 @@ You can process a single individual source of OpenAddresses data with the comman

1. Download a source from [OpenAdresses/openaddresses on Github](https://github.com/openaddresses/openaddresses). [Berkeley, California](https://results.openaddresses.io/sources/us/ca/berkeley) is a small, reliable source that’s good to test with:

curl -L https://github.com/openaddresses/openaddresses/raw/master/sources/us/ca/berkeley.json -o us-ca-berkeley.json
curl -o us-ca-berkeley.json \
-L https://github.com/openaddresses/openaddresses/raw/master/sources/us/ca/berkeley.json

2. Run `openaddr-process-one` to process the source:

Expand Down
4 changes: 2 additions & 2 deletions openaddr/ci/schema.pgsql
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ CREATE VIEW dashboard_runs AS
SELECT round(extract(epoch from datetime_start)::numeric, 3)::text AS tsname
FROM sets;

GRANT SELECT ON dashboard_runs TO dashboard;
--GRANT SELECT ON dashboard_runs TO dashboard;

CREATE VIEW dashboard_stats AS
SELECT round(extract(epoch from s.datetime_start)::numeric, 3)::text AS tsname,
Expand All @@ -120,4 +120,4 @@ CREATE VIEW dashboard_stats AS
AND s.datetime_end IS NOT NULL
AND r.state::text != 'null';

GRANT SELECT ON dashboard_stats TO dashboard;
--GRANT SELECT ON dashboard_stats TO dashboard;
4 changes: 2 additions & 2 deletions openaddr/ci/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from . import (
db_connect, db_queue, db_queue, pop_task_from_taskqueue,
DONE_QUEUE, TASK_QUEUE, DUE_QUEUE, setup_logger, HEARTBEAT_QUEUE,
DONE_QUEUE, TASK_QUEUE_2, DUE_QUEUE, setup_logger, HEARTBEAT_QUEUE,
log_function_errors
)

Expand Down Expand Up @@ -58,7 +58,7 @@ def main():

try:
with db_connect(args.database_url) as conn:
task_Q = db_queue(conn, TASK_QUEUE)
task_Q = db_queue(conn, TASK_QUEUE_2)
done_Q = db_queue(conn, DONE_QUEUE)
due_Q = db_queue(conn, DUE_QUEUE)
beat_Q = db_queue(conn, HEARTBEAT_QUEUE)
Expand Down
2 changes: 1 addition & 1 deletion openaddr/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def iterate_sources_dir(sources_dir):
_, ext = splitext(filename.lower())
if ext == '.json':
path = relpath(join(dirname, filename), sources_dir)
yield normalize('NFC', path)
yield path # yield normalize('NFC', path)

def load_fake_state(sources_dir):
'''
Expand Down
23 changes: 10 additions & 13 deletions openaddr/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from os.path import dirname, join, basename, exists, splitext
from contextlib import contextmanager
from subprocess import Popen, PIPE
from unicodedata import normalize
from threading import Lock

if sys.platform != 'win32':
Expand Down Expand Up @@ -987,19 +988,15 @@ def test_single_fr_paris(self):
def test_single_fr_lareunion(self):
''' Test complete process_one.process on data that uses non-UTF8 encoding (issue #136)
'''
# Common encoding of la-réunion uses U+00E9:
# http://www.fileformat.info/info/unicode/char/e9/index.htm
filename_00E9 = b'fr/la-r\xc3\xa9union.json'.decode('utf8')

# Less-common encoding of la-réunion uses combining character U+0301:
# http://www.fileformat.info/info/unicode/char/0301/index.htm
filename_0301 = b'fr/la-re\xcc\x81union.json'.decode('utf8')

if os.path.exists(join(self.src_dir, filename_00E9)):
source = join(self.src_dir, filename_00E9)
elif os.path.exists(join(self.src_dir, filename_0301)):
source = join(self.src_dir, filename_0301)
else:
source = None

for form in ('NFC', 'NFD'):
normalized = normalize(form, u'fr/la-réunion.json')
if os.path.exists(join(self.src_dir, normalized)):
source = join(self.src_dir, normalized)
break

if source is None:
raise Exception('Could not find a usable fr/la-réunion.json')

with HTTMock(self.response_content):
Expand Down