diff --git a/examples/finnish_addresses/README.md b/examples/finnish_addresses/README.md index 1b05aec2..52bd53bc 100644 --- a/examples/finnish_addresses/README.md +++ b/examples/finnish_addresses/README.md @@ -1,19 +1,14 @@ # Simple addresses of Buildings of Finland -The goal of this example project is to create an OGC API Features service with hakunapi to provide nationwide addresses of the buildings in Finland available freely from https://www.avoindata.fi/data/fi/dataset/rakennusten-osoitetiedot-koko-suomi. +The goal of this example project is to create an OGC API Features service with hakunapi to provide nationwide addresses of the buildings in Finland available freely from https://github.com/ubigu/finnish-open-addresses. ## Processing the data ### Getting the data -Begin by downloading the .7z file containing all required data from https://www.avoindata.fi/data/fi/dataset/rakennusten-osoitetiedot-koko-suomi +Begin by downloading the data from https://github.com/ubigu/finnish-open-addresses/releases (latest is best) as either zip or tar.gz -Extract the file the 7zip file using your favourite tool and convert the file from latin-1 to utf-8 encoding, for example - -``` -7z x suomi_osoitteet_2023-02-13.7z -iconv -f ISO-8859-1 -t UTF-8 Suomi_osoitteet_2023-02-13.OPT > suomi_osoitteet.csv -``` +Extract the file using your favourite tool and move the `data` directory to this path (the `data` directory is contained inside a `finnish-open-addresses-`) ### Setting up the database @@ -29,38 +24,29 @@ CREATE EXTENSION postgis; ### Load the data into the database -Create the tables to represent the raw data and load the data in +Create the tables to load the data into ``` psql -d address_fin -f create_tables.sql -psql -d address_fin -c "\copy suomi_osoitteet ( -rakennustunnus, -sijaintikunta, -maakunta, -kayttotarkoitus, -pohjoiskoordinaatti, -itakoordinaatti, -osoitenumero, -kadunnimi_suomi, -kadunnimi_ruotsi, -katunumero, -postinumero, -aanestysalue, -aanestysalue_nimi_suomi, -aanestysalue_nimi_ruotsi, -sijaintikiinteisto, -tietojen_poimintapaiva -) FROM 'suomi_osoitteet.csv' WITH (FORMAT CSV, DELIMITER ';')" - -psql -d address_fin -c "\copy suomi_kunnat (sijaintikunta, nimi_suomi) FROM 'suomi_kunnat.csv' WITH (FORMAT CSV, DELIMITER ';', HEADER)" +psql -d address_fin -c "\copy suomi_kunnat (municipality_number, name_fin) FROM 'suomi_kunnat.csv' WITH (FORMAT CSV, HEADER)" + +cut -d, -f 1 suomi_kunnat.csv | tail -n +2 | while read -r municipality; do PGPASSWORD=test psql -U test -d address_fin -c "\copy finnish_open_address ( + permanent_building_identifier, + address_index, + east, + north, + postal_code, + address, + address_number, + municipality_number, + property_identifier +) FROM 'data/${municipality}.csv' WITH (FORMAT CSV)"; done ``` ### Transform raw data * Create unique primary key index by combining rakennustunnus with osoitenumero -* Select thoroughfarename from kadunnimi_suomi, kadunnimi_ruotsi * Copy finnish municipality name over -* Filter out rows with no finnish or swedish thoroughfarenames ``` psql -d address_fin -f simple_addresses.sql diff --git a/examples/finnish_addresses/create_tables.sql b/examples/finnish_addresses/create_tables.sql index 68bf3aab..53a95195 100644 --- a/examples/finnish_addresses/create_tables.sql +++ b/examples/finnish_addresses/create_tables.sql @@ -1,31 +1,23 @@ --- DROP statements commented out to avoid dropping tables user didn't expect to be dropped ---DROP TABLE IF EXISTS suomi_osoitteet; -CREATE TABLE suomi_osoitteet ( - rakennustunnus varchar(10), - sijaintikunta char(3), - maakunta char(2), - kayttotarkoitus char(1), - pohjoiskoordinaatti integer, - itakoordinaatti integer, - osoitenumero smallint, - kadunnimi_suomi varchar(100), - kadunnimi_ruotsi varchar(100), - katunumero varchar(13), - postinumero char(5), - aanestysalue char(4), - aanestysalue_nimi_suomi varchar(50), - aanestysalue_nimi_ruotsi varchar(50), - sijaintikiinteisto varchar(20), - tietojen_poimintapaiva varchar(20) +DROP TABLE IF EXISTS finnish_open_address; +CREATE TABLE finnish_open_address ( + permanent_building_identifier varchar(10), + address_index smallint, + east integer, + north integer, + postal_code char(5), + address varchar(100), + address_number varchar(13), + municipality_number char(3), + property_identifier varchar(14) ); ---DROP TABLE IF EXISTS suomi_kunnat; +DROP TABLE IF EXISTS suomi_kunnat; CREATE TABLE suomi_kunnat ( - sijaintikunta char(3) PRIMARY KEY, - nimi_suomi varchar(50) + municipality_number char(3) PRIMARY KEY, + name_fin varchar(50) ); ---DROP TABLE IF EXISTS simple_addresses; +DROP TABLE IF EXISTS simple_addresses; CREATE TABLE simple_addresses ( id varchar(12) PRIMARY KEY, geom geometry(Point, 3067) NOT NULL, diff --git a/examples/finnish_addresses/simple_addresses.sql b/examples/finnish_addresses/simple_addresses.sql index 30acf378..6207cb3c 100644 --- a/examples/finnish_addresses/simple_addresses.sql +++ b/examples/finnish_addresses/simple_addresses.sql @@ -9,14 +9,17 @@ INSERT INTO simple_addresses ( parcel ) SELECT - concat(rakennustunnus, '-', osoitenumero), - ST_SetSRID(ST_MakePoint(itakoordinaatti, pohjoiskoordinaatti), 3067), - COALESCE(kadunnimi_suomi, kadunnimi_ruotsi), - postinumero, - nimi_suomi, - katunumero, - rakennustunnus, - sijaintikiinteisto -FROM suomi_osoitteet -JOIN suomi_kunnat USING (sijaintikunta) -WHERE kadunnimi_suomi IS NOT NULL OR kadunnimi_ruotsi IS NOT NULL + concat(permanent_building_identifier, '-', address_index), + ST_Point(east, north, 3067), + address, + postal_code, + name_fin, + address_number, + permanent_building_identifier, + property_identifier +FROM finnish_open_address +JOIN suomi_kunnat USING (municipality_number) +-- TODO Remove these after the original dataset has been improved +WHERE east IS NOT NULL +AND north IS NOT NULL +AND postal_code IS NOT NULL;