diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000..8210b1b
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,6 @@
+# These are supported funding model platforms
+
+github: [mvexel]
+patreon: mvexel
+ko_fi: mvexel
+liberapay: mvexel
diff --git a/.gitignore b/.gitignore
index 7e99e36..2a7fe04 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
-*.pyc
\ No newline at end of file
+*.pyc
+.DS_Store
+venv/
+__pycache__/
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1dae9ab
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,14 @@
+Copyright (C) 2012 Toby Murray
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
\ No newline at end of file
diff --git a/README.md b/README.md
index bc2f582..3703eb0 100644
--- a/README.md
+++ b/README.md
@@ -8,11 +8,13 @@ It can also keep a database created with a weekly dump file up to date using min
Setup
------------
-ChangesetMD works with python 2.7.
+ChangesetMD works with Python 3.6 or newer.
Aside from postgresql, ChangesetMD depends on the python libraries psycopg2 and lxml.
On Debian-based systems this means installing the python-psycopg2 and python-lxml packages.
+If you are using `pip` and `virtualenv`, you can install all dependencies with `pip install -r requirements.txt`.
+
If you want to parse the changeset file without first unzipping it, you will also need to install the [bz2file library](http://pypi.python.org/pypi/bz2file) since the built in bz2 library can not handle multi-stream bzip files.
For building geometries, ```postgis``` extension needs to be [installed](http://postgis.net/install).
@@ -26,21 +28,45 @@ It is easiest if your OS user has access to this database. I just created a user
createuser
+Full Debian build instructions
+------------------------------
+
+ sudo apt install sudo screen locate git tar unzip wget bzip2 apache2 python3-psycopg2 python3-yaml libpq-dev postgresql postgresql-contrib postgis postgresql-15-postgis-3 postgresql-15-postgis-3-scripts net-tools curl python3-full gcc libpython3.11-dev libxml2-dev libxslt-dev
+
+ python3 -m venv .venv
+ source .venv/bin/activate
+ pip install -r requirements.txt
+
+ sudo -u postgres -i
+ createuser youruseraccount
+ createdb -E UTF8 -O youruseraccount changesets
+
+ psql
+ \c changesets
+ CREATE EXTENSION postgis;
+ ALTER TABLE geometry_columns OWNER TO youruseraccount;
+ ALTER TABLE spatial_ref_sys OWNER TO youruseraccount;
+ \q
+ exit
+
+
Execution
------------
The first time you run it, you will need to include the -c | --create option to create the table:
- python changesetmd.py -d -c
+ python changesetmd.py -d -c -g
+
+The `-g` | `--geometry` argument is optional and builds polygon geometries for changesets so that you can query which changesets were within which areas.
The create function can be combined with the file option to immediately parse a file.
To parse a dump file, use the -f | --file option.
- python changesetmd.py -d -f /tmp/changeset-latest.osm
+ python changesetmd.py -d -g -f /tmp/discussions-latest.osm.bz2
If no other arguments are given, it will access postgres using the default settings of the postgres client, typically connecting on the unix socket as the current OS user. Use the ```--help``` argument to see optional arguments for connecting to postgres.
-You can add the `-g` | `--geometry` option to build polygon geometries (the database also needs to be created with this option).
+Again, the `-g` | `--geometry` argument is optional. Either of changeset-latest.osm.bz2 or discussions-latest.osm.bz2 or neither can be used to populate the database.
Replication
------------
@@ -122,13 +148,3 @@ Find all changesets that were created in Liberty Island:
SELECT count(id)
FROM osm_changeset c, (SELECT ST_SetSRID(ST_MakeEnvelope(-74.0474545,40.6884971,-74.0433990,40.6911817),4326) AS geom) s
WHERE ST_CoveredBy(c.geom, s.geom);
-
-License
-------------
-Copyright (C) 2012 Toby Murray
-
-This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
-See the GNU Affero General Public License for more details: http://www.gnu.org/licenses/agpl.txt
diff --git a/changesetmd.py b/changesetmd.py
index 18d2e25..4b90962 100755
--- a/changesetmd.py
+++ b/changesetmd.py
@@ -1,48 +1,50 @@
-#!/usr/bin/python
-'''
+#!/usr/bin/env python
+"""
ChangesetMD is a simple XML parser to read the weekly changeset metadata dumps
from OpenStreetmap into a postgres database for querying.
@author: Toby Murray
-'''
+"""
-import os
import sys
import argparse
+import gzip
+from datetime import datetime
+from datetime import timedelta
import psycopg2
import psycopg2.extras
import queries
-import gzip
-import urllib2
+import requests
import yaml
from lxml import etree
-from datetime import datetime
-from datetime import timedelta
-from StringIO import StringIO
try:
from bz2file import BZ2File
+
bz2Support = True
except ImportError:
bz2Support = False
-BASE_REPL_URL = "http://planet.openstreetmap.org/replication/changesets/"
+BASE_REPL_URL = "https://planet.openstreetmap.org/replication/changesets/"
+
-class ChangesetMD():
+class ChangesetMD:
def __init__(self, createGeometry):
self.createGeometry = createGeometry
def truncateTables(self, connection):
- print 'truncating tables'
+ print("truncating tables")
cursor = connection.cursor()
cursor.execute("TRUNCATE TABLE osm_changeset_comment CASCADE;")
cursor.execute("TRUNCATE TABLE osm_changeset CASCADE;")
cursor.execute(queries.dropIndexes)
- cursor.execute("UPDATE osm_changeset_state set last_sequence = -1, last_timestamp = null, update_in_progress = 0")
+ cursor.execute(
+ "UPDATE osm_changeset_state set last_sequence = -1, last_timestamp = null, update_in_progress = 0"
+ )
connection.commit()
def createTables(self, connection):
- print 'creating tables'
+ print("creating tables")
cursor = connection.cursor()
cursor.execute(queries.createChangesetTable)
cursor.execute(queries.initStateTable)
@@ -50,171 +52,318 @@ def createTables(self, connection):
cursor.execute(queries.createGeometryColumn)
connection.commit()
- def insertNew(self, connection, id, userId, createdAt, minLat, maxLat, minLon, maxLon, closedAt, open, numChanges, userName, tags, comments):
+ def insertNewBatch(self, connection, data_arr):
cursor = connection.cursor()
if self.createGeometry:
- cursor.execute('''INSERT into osm_changeset
+ sql = """INSERT into osm_changeset
(id, user_id, created_at, min_lat, max_lat, min_lon, max_lon, closed_at, open, num_changes, user_name, tags, geom)
- values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,ST_SetSRID(ST_MakeEnvelope(%s,%s,%s,%s), 4326))''',
- (id, userId, createdAt, minLat, maxLat, minLon, maxLon, closedAt, open, numChanges, userName, tags, minLon, minLat, maxLon, maxLat))
+ values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,ST_SetSRID(ST_MakeEnvelope(%s,%s,%s,%s), 4326))"""
+ psycopg2.extras.execute_batch(cursor, sql, data_arr)
+ cursor.close()
else:
- cursor.execute('''INSERT into osm_changeset
+ sql = """INSERT into osm_changeset
(id, user_id, created_at, min_lat, max_lat, min_lon, max_lon, closed_at, open, num_changes, user_name, tags)
- values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',
- (id, userId, createdAt, minLat, maxLat, minLon, maxLon, closedAt, open, numChanges, userName, tags))
- for comment in comments:
- cursor.execute('''INSERT into osm_changeset_comment
+ values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
+ psycopg2.extras.execute_batch(cursor, sql, data_arr)
+ cursor.close()
+
+ def insertNewBatchComment(self, connection, comment_arr):
+ cursor = connection.cursor()
+ sql = """INSERT into osm_changeset_comment
(comment_changeset_id, comment_user_id, comment_user_name, comment_date, comment_text)
- values (%s,%s,%s,%s,%s)''',
- (id, comment['uid'], comment['user'], comment['date'], comment['text']))
+ values (%s,%s,%s,%s,%s)"""
+ psycopg2.extras.execute_batch(cursor, sql, comment_arr)
+ cursor.close()
def deleteExisting(self, connection, id):
cursor = connection.cursor()
- cursor.execute('''DELETE FROM osm_changeset_comment
- WHERE comment_changeset_id = %s''', (id,))
- cursor.execute('''DELETE FROM osm_changeset
- WHERE id = %s''', (id,))
+ cursor.execute(
+ """DELETE FROM osm_changeset_comment
+ WHERE comment_changeset_id = %s""",
+ (id,),
+ )
+ cursor.execute(
+ """DELETE FROM osm_changeset
+ WHERE id = %s""",
+ (id,),
+ )
def parseFile(self, connection, changesetFile, doReplication):
parsedCount = 0
startTime = datetime.now()
cursor = connection.cursor()
context = etree.iterparse(changesetFile)
- action, root = context.next()
+ action, root = next(context)
+ changesets = []
+ comments = []
for action, elem in context:
- if(elem.tag != 'changeset'):
+ if elem.tag != "changeset":
continue
parsedCount += 1
tags = {}
- for tag in elem.iterchildren(tag='tag'):
- tags[tag.attrib['k']] = tag.attrib['v']
-
- comments = []
- for discussion in elem.iterchildren(tag='discussion'):
- for commentElement in discussion.iterchildren(tag='comment'):
- comment = dict()
- comment['uid'] = commentElement.attrib.get('uid')
- comment['user'] = commentElement.attrib.get('user')
- comment['date'] = commentElement.attrib.get('date')
- for text in commentElement.iterchildren(tag='text'):
- comment['text'] = text.text
+ for tag in elem.iterchildren(tag="tag"):
+ tags[tag.attrib["k"]] = tag.attrib["v"]
+
+ for discussion in elem.iterchildren(tag="discussion"):
+ for commentElement in discussion.iterchildren(tag="comment"):
+ for text in commentElement.iterchildren(tag="text"):
+ text = text.text
+ comment = (
+ elem.attrib["id"],
+ commentElement.attrib.get("uid"),
+ commentElement.attrib.get("user"),
+ commentElement.attrib.get("date"),
+ text,
+ )
comments.append(comment)
- if(doReplication):
- self.deleteExisting(connection, elem.attrib['id'])
-
- self.insertNew(connection, elem.attrib['id'], elem.attrib.get('uid', None),
- elem.attrib['created_at'], elem.attrib.get('min_lat', None),
- elem.attrib.get('max_lat', None), elem.attrib.get('min_lon', None),
- elem.attrib.get('max_lon', None),elem.attrib.get('closed_at', None),
- elem.attrib.get('open', None), elem.attrib.get('num_changes', None),
- elem.attrib.get('user', None), tags, comments)
-
- if((parsedCount % 10000) == 0):
- print "parsed %s" % ('{:,}'.format(parsedCount))
- print "cumulative rate: %s/sec" % '{:,.0f}'.format(parsedCount/timedelta.total_seconds(datetime.now() - startTime))
-
- #clear everything we don't need from memory to avoid leaking
+ if doReplication:
+ self.deleteExisting(connection, elem.attrib["id"])
+
+ if self.createGeometry:
+ changesets.append(
+ (
+ elem.attrib["id"],
+ elem.attrib.get("uid", None),
+ elem.attrib["created_at"],
+ elem.attrib.get("min_lat", None),
+ elem.attrib.get("max_lat", None),
+ elem.attrib.get("min_lon", None),
+ elem.attrib.get("max_lon", None),
+ elem.attrib.get("closed_at", None),
+ elem.attrib.get("open", None),
+ elem.attrib.get("num_changes", None),
+ elem.attrib.get("user", None),
+ tags,
+ elem.attrib.get("min_lon", None),
+ elem.attrib.get("min_lat", None),
+ elem.attrib.get("max_lon", None),
+ elem.attrib.get("max_lat", None),
+ )
+ )
+ else:
+ changesets.append(
+ (
+ elem.attrib["id"],
+ elem.attrib.get("uid", None),
+ elem.attrib["created_at"],
+ elem.attrib.get("min_lat", None),
+ elem.attrib.get("max_lat", None),
+ elem.attrib.get("min_lon", None),
+ elem.attrib.get("max_lon", None),
+ elem.attrib.get("closed_at", None),
+ elem.attrib.get("open", None),
+ elem.attrib.get("num_changes", None),
+ elem.attrib.get("user", None),
+ tags,
+ )
+ )
+
+ if (parsedCount % 100000) == 0:
+ self.insertNewBatch(connection, changesets)
+ self.insertNewBatchComment(connection, comments)
+ changesets = []
+ comments = []
+ print("parsed {}".format(("{:,}".format(parsedCount))))
+ print(
+ "cumulative rate: {}/sec".format(
+ "{:,.0f}".format(
+ parsedCount
+ / timedelta.total_seconds(datetime.now() - startTime)
+ )
+ )
+ )
+
+ # clear everything we don't need from memory to avoid leaking
elem.clear()
while elem.getprevious() is not None:
del elem.getparent()[0]
+ # Update whatever is left, then commit
+ self.insertNewBatch(connection, changesets)
+ self.insertNewBatchComment(connection, comments)
connection.commit()
- print "parsing complete"
- print "parsed {:,}".format(parsedCount)
+ print("parsing complete")
+ print("parsed {:,}".format(parsedCount))
def fetchReplicationFile(self, sequenceNumber):
- topdir = format(sequenceNumber / 1000000, '003')
- subdir = format((sequenceNumber / 1000) % 1000, '003')
- fileNumber = format(sequenceNumber % 1000, '003')
- fileUrl = BASE_REPL_URL + topdir + '/' + subdir + '/' + fileNumber + '.osm.gz'
- print "opening replication file at " + fileUrl
- replicationFile = urllib2.urlopen(fileUrl)
- replicationData = StringIO(replicationFile.read())
- return gzip.GzipFile(fileobj=replicationData)
+ sequenceNumber = str(sequenceNumber).zfill(9)
+ topdir = str(sequenceNumber)[:3]
+ subdir = str(sequenceNumber)[3:6]
+ fileNumber = str(sequenceNumber)[-3:]
+ fileUrl = BASE_REPL_URL + topdir + "/" + subdir + "/" + fileNumber + ".osm.gz"
+ print("opening replication file at " + fileUrl)
+ replicationFile = requests.get(fileUrl, stream=True)
+ replicationData = replicationFile.raw
+ f = gzip.GzipFile(fileobj=replicationData)
+ return f
def doReplication(self, connection):
cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
try:
- cursor.execute('LOCK TABLE osm_changeset_state IN ACCESS EXCLUSIVE MODE NOWAIT')
+ cursor.execute(
+ "LOCK TABLE osm_changeset_state IN ACCESS EXCLUSIVE MODE NOWAIT"
+ )
except psycopg2.OperationalError as e:
- print "error getting lock on state table. Another process might be running"
+ print("error getting lock on state table. Another process might be running")
return 1
- cursor.execute('select * from osm_changeset_state')
+ cursor.execute("select * from osm_changeset_state")
dbStatus = cursor.fetchone()
- lastDbSequence = dbStatus['last_sequence']
+ lastDbSequence = dbStatus["last_sequence"]
timestamp = None
lastServerTimestamp = None
newTimestamp = None
- if(dbStatus['last_timestamp'] is not None):
- timestamp = dbStatus['last_timestamp']
- print "latest timestamp in database: " + str(timestamp)
- if(dbStatus['update_in_progress'] == 1):
- print "concurrent update in progress. Bailing out!"
+ if dbStatus["last_timestamp"] is not None:
+ timestamp = dbStatus["last_timestamp"]
+ print("latest timestamp in database: " + str(timestamp))
+ if dbStatus["update_in_progress"] == 1:
+ print("concurrent update in progress. Bailing out!")
return 1
- if(lastDbSequence == -1):
- print "replication state not initialized. You must set the sequence number first."
+ if lastDbSequence == -1:
+ print(
+ "replication state not initialized. You must set the sequence number first."
+ )
return 1
- cursor.execute('update osm_changeset_state set update_in_progress = 1')
+ cursor.execute("update osm_changeset_state set update_in_progress = 1")
connection.commit()
print("latest sequence from the database: " + str(lastDbSequence))
- #No matter what happens after this point, execution needs to reach the update statement
- #at the end of this method to unlock the database or an error will forever leave it locked
+ # No matter what happens after this point, execution needs to reach the update statement
+ # at the end of this method to unlock the database or an error will forever leave it locked
returnStatus = 0
try:
- serverState = yaml.load(urllib2.urlopen(BASE_REPL_URL + "state.yaml"))
- lastServerSequence = serverState['sequence']
- print "got sequence"
- lastServerTimestamp = serverState['last_run']
- print "last timestamp on server: " + str(lastServerTimestamp)
+ serverState = yaml.full_load(requests.get(BASE_REPL_URL + "state.yaml").text)
+ lastServerSequence = serverState["sequence"]
+ print("got sequence")
+ lastServerTimestamp = serverState["last_run"]
+ print("last timestamp on server: " + str(lastServerTimestamp))
except Exception as e:
- print "error retrieving server state file. Bailing on replication"
- print e
+ print("error retrieving server state file. Bailing on replication")
+ print(e)
returnStatus = 2
else:
try:
print("latest sequence on OSM server: " + str(lastServerSequence))
- if(lastServerSequence > lastDbSequence):
+ if lastServerSequence > lastDbSequence:
print("server has new sequence. commencing replication")
currentSequence = lastDbSequence + 1
- while(currentSequence <= lastServerSequence):
- self.parseFile(connection, self.fetchReplicationFile(currentSequence), True)
- cursor.execute('update osm_changeset_state set last_sequence = %s', (currentSequence,))
+ while currentSequence <= lastServerSequence:
+ self.parseFile(
+ connection, self.fetchReplicationFile(currentSequence), True
+ )
+ cursor.execute(
+ "update osm_changeset_state set last_sequence = %s",
+ (currentSequence,),
+ )
connection.commit()
currentSequence += 1
timestamp = lastServerTimestamp
print("finished with replication. Clearing status record")
except Exception as e:
- print "error during replication"
- print e
+ print("error during replication")
+ print(e)
returnStatus = 2
- cursor.execute('update osm_changeset_state set update_in_progress = 0, last_timestamp = %s', (timestamp,))
+ cursor.execute(
+ "update osm_changeset_state set update_in_progress = 0, last_timestamp = %s",
+ (timestamp,),
+ )
connection.commit()
return returnStatus
-if __name__ == '__main__':
+
+if __name__ == "__main__":
beginTime = datetime.now()
endTime = None
timeCost = None
- argParser = argparse.ArgumentParser(description="Parse OSM Changeset metadata into a database")
- argParser.add_argument('-t', '--trunc', action='store_true', default=False, dest='truncateTables', help='Truncate existing tables (also drops indexes)')
- argParser.add_argument('-c', '--create', action='store_true', default=False, dest='createTables', help='Create tables')
- argParser.add_argument('-H', '--host', action='store', dest='dbHost', help='Database hostname')
- argParser.add_argument('-P', '--port', action='store', dest='dbPort', default=None, help='Database port')
- argParser.add_argument('-u', '--user', action='store', dest='dbUser', default=None, help='Database username')
- argParser.add_argument('-p', '--password', action='store', dest='dbPass', default=None, help='Database password')
- argParser.add_argument('-d', '--database', action='store', dest='dbName', help='Target database', required=True)
- argParser.add_argument('-f', '--file', action='store', dest='fileName', help='OSM changeset file to parse')
- argParser.add_argument('-r', '--replicate', action='store_true', dest='doReplication', default=False, help='Apply a replication file to an existing database')
- argParser.add_argument('-g', '--geometry', action='store_true', dest='createGeometry', default=False, help='Build geometry of changesets (requires postgis)')
+ argParser = argparse.ArgumentParser(
+ description="Parse OSM Changeset metadata into a database"
+ )
+ argParser.add_argument(
+ "-t",
+ "--trunc",
+ action="store_true",
+ default=False,
+ dest="truncateTables",
+ help="Truncate existing tables (also drops indexes)",
+ )
+ argParser.add_argument(
+ "-c",
+ "--create",
+ action="store_true",
+ default=False,
+ dest="createTables",
+ help="Create tables",
+ )
+ argParser.add_argument(
+ "-H", "--host", action="store", dest="dbHost", help="Database hostname"
+ )
+ argParser.add_argument(
+ "-P",
+ "--port",
+ action="store",
+ dest="dbPort",
+ default=None,
+ help="Database port",
+ )
+ argParser.add_argument(
+ "-u",
+ "--user",
+ action="store",
+ dest="dbUser",
+ default=None,
+ help="Database username",
+ )
+ argParser.add_argument(
+ "-p",
+ "--password",
+ action="store",
+ dest="dbPass",
+ default=None,
+ help="Database password",
+ )
+ argParser.add_argument(
+ "-d",
+ "--database",
+ action="store",
+ dest="dbName",
+ help="Target database",
+ required=True,
+ )
+ argParser.add_argument(
+ "-f",
+ "--file",
+ action="store",
+ dest="fileName",
+ help="OSM changeset file to parse",
+ )
+ argParser.add_argument(
+ "-r",
+ "--replicate",
+ action="store_true",
+ dest="doReplication",
+ default=False,
+ help="Apply a replication file to an existing database",
+ )
+ argParser.add_argument(
+ "-g",
+ "--geometry",
+ action="store_true",
+ dest="createGeometry",
+ default=False,
+ help="Build geometry of changesets (requires postgis)",
+ )
args = argParser.parse_args()
- conn = psycopg2.connect(database=args.dbName, user=args.dbUser, password=args.dbPass, host=args.dbHost, port=args.dbPort)
-
+ conn = psycopg2.connect(
+ database=args.dbName,
+ user=args.dbUser,
+ password=args.dbPass,
+ host=args.dbHost,
+ port=args.dbPort,
+ )
md = ChangesetMD(args.createGeometry)
if args.truncateTables:
@@ -225,39 +374,43 @@ def doReplication(self, connection):
psycopg2.extras.register_hstore(conn)
- if(args.doReplication):
+ if args.doReplication:
returnStatus = md.doReplication(conn)
sys.exit(returnStatus)
if not (args.fileName is None):
if args.createGeometry:
- print 'parsing changeset file with geometries'
+ print("parsing changeset file with geometries")
else:
- print 'parsing changeset file'
+ print("parsing changeset file")
changesetFile = None
- if(args.doReplication):
- changesetFile = gzip.open(args.fileName, 'rb')
+ if args.doReplication:
+ changesetFile = gzip.open(args.fileName, "rb")
else:
- if(args.fileName[-4:] == '.bz2'):
- if(bz2Support):
+ if args.fileName[-4:] == ".bz2":
+ if bz2Support:
changesetFile = BZ2File(args.fileName)
else:
- print 'ERROR: bzip2 support not available. Unzip file first or install bz2file'
+ print(
+ "ERROR: bzip2 support not available. Unzip file first or install bz2file"
+ )
sys.exit(1)
else:
- changesetFile = open(args.fileName, 'rb')
+ changesetFile = open(args.fileName, "rb")
- if(changesetFile != None):
+ if changesetFile != None:
md.parseFile(conn, changesetFile, args.doReplication)
else:
- print 'ERROR: no changeset file opened. Something went wrong in processing args'
+ print(
+ "ERROR: no changeset file opened. Something went wrong in processing args"
+ )
sys.exist(1)
- if(not args.doReplication):
+ if not args.doReplication:
cursor = conn.cursor()
- print 'creating constraints'
+ print("creating constraints")
cursor.execute(queries.createConstraints)
- print 'creating indexes'
+ print("creating indexes")
cursor.execute(queries.createIndexes)
if args.createGeometry:
cursor.execute(queries.createGeomIndex)
@@ -268,6 +421,6 @@ def doReplication(self, connection):
endTime = datetime.now()
timeCost = endTime - beginTime
- print 'Processing time cost is ', timeCost
+ print("Processing time cost is ", timeCost)
- print 'All done. Enjoy your (meta)data!'
+ print("All done. Enjoy your (meta)data!")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1904ef7
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+bz2file==0.98
+lxml==4.9.3
+psycopg2-binary==2.9.9
+PyYAML==6.0.1
+requests==2.31.0
+urllib3==1.26.18
\ No newline at end of file