From 671685c66cb7793d7ce7bf658b938fc6e9ff0054 Mon Sep 17 00:00:00 2001 From: Wesley Batista Date: Mon, 7 Dec 2020 21:17:42 -0300 Subject: [PATCH 1/3] add upload script --- b3_data/upload.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 b3_data/upload.py diff --git a/b3_data/upload.py b/b3_data/upload.py new file mode 100644 index 0000000..0fa7bd6 --- /dev/null +++ b/b3_data/upload.py @@ -0,0 +1,83 @@ +from sqlalchemy import create_engine +from sqlalchemy import Column, String, BigInteger, Float +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm.session import sessionmaker +from zipfile import ZipFile +import csv +from io import TextIOWrapper + + +FIELDNAMES = 'RptDt;TckrSymb;UpdActn;GrssTradAmt;TradQty;NtryTm;TradId;TradgSsnId;TradDt' # noqa +Base = declarative_base() +Session = sessionmaker() + + +class TickerCsv(Base): + __tablename__ = 'tickercsv' + + _id = Column(BigInteger, name='id', primary_key=True) + RptDt = Column(String, name='RptDt') + TckrSymb = Column(String, name='TckrSymb') + UpdActn = Column(BigInteger, name='UpdActn') + GrssTradAmt = Column(Float, name='GrssTradAmt') + TradQty = Column(BigInteger, name='TradQty') + NtryTm = Column(BigInteger, name='NtryTm') + TradId = Column(BigInteger, name='TradId') + TradgSsnId = Column(BigInteger, name='TradgSsnId') + TradDt = Column(String, name='TradDt') + + @classmethod + def from_tuple(cls, *args): + asdict = dict([x for x in zip(FIELDNAMES.split(';'), args)]) + return cls(**asdict) + + +def _make_sure_table_exists(engine): + Base.metadata.create_all(engine) + + +def parse_row(**columns): + if 'GrssTradAmt' in columns: + columns['GrssTradAmt'] = float(columns['GrssTradAmt'].replace(',', '.')) + return TickerCsv(**columns) + + +def _get_csv_reader(contents): + return csv.DictReader(contents, fieldnames=FIELDNAMES.split(';'), delimiter=';') # noqa + + +def _csv_in_chunks(reader, chunksize): + chunk = [] + for i, line in enumerate(reader): + if (i % chunksize == 0 and i > 0): + yield chunk + chunk = [] + chunk.append(line) + yield chunk + + +def _get_csv_file_stream_reader(filepath, date): + zip_csv_filepath = f"TradeIntraday_{date.replace('-', '')}_1.txt" + with ZipFile(filepath) as zip_archive: + with zip_archive.open(zip_csv_filepath, 'r') as infile: + reader = _get_csv_reader(TextIOWrapper(infile, 'utf-8')) + for row in reader: + yield row + + +def upload(dsn, chunk_size, date): + engine = create_engine(dsn) + _make_sure_table_exists(engine) + Session.configure(bind=engine) + s = Session() + filepath = f"{date}.zip" + csv_reader = _get_csv_file_stream_reader(filepath, date) + processed_rows = 0 + for chunk_idx, chunk in enumerate(_csv_in_chunks(csv_reader, chunk_size), 1): + print(f'chunk #{chunk_idx}') + data = [parse_row(**row) for i, row in enumerate(chunk) if i != 0] + s.bulk_save_objects(data) + s.commit() + processed_rows += len(data) + print(f'processed rows: {processed_rows}') + Session.close_all() From 4f78cc11a32b9b5e5629f15d2318fad4ba7cf827 Mon Sep 17 00:00:00 2001 From: Wesley Batista Date: Mon, 7 Dec 2020 21:19:54 -0300 Subject: [PATCH 2/3] update deps --- poetry.lock | 87 +++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 ++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index d4a7545..7345aab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -30,6 +30,14 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +[[package]] +name = "psycopg2" +version = "2.8.6" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" + [[package]] name = "requests" version = "2.25.0" @@ -48,6 +56,26 @@ urllib3 = ">=1.21.1,<1.27" security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] +[[package]] +name = "sqlalchemy" +version = "1.3.20" +description = "Database Abstraction Library" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.extras] +mssql = ["pyodbc"] +mssql_pymssql = ["pymssql"] +mssql_pyodbc = ["pyodbc"] +mysql = ["mysqlclient"] +oracle = ["cx-oracle"] +postgresql = ["psycopg2"] +postgresql_pg8000 = ["pg8000"] +postgresql_psycopg2binary = ["psycopg2-binary"] +postgresql_psycopg2cffi = ["psycopg2cffi"] +pymysql = ["pymysql"] + [[package]] name = "urllib3" version = "1.26.2" @@ -64,7 +92,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "7fc2614e96f223874ad17fe713302da6bd400d44794729e8a74c8d7d632620d0" +content-hash = "9b99bfd106a071271056817b9e9b6b5bce8a08560a016dad77f20896b0e18756" [metadata.files] certifi = [ @@ -83,10 +111,67 @@ idna = [ {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, ] +psycopg2 = [ + {file = "psycopg2-2.8.6-cp27-cp27m-win32.whl", hash = "sha256:068115e13c70dc5982dfc00c5d70437fe37c014c808acce119b5448361c03725"}, + {file = "psycopg2-2.8.6-cp27-cp27m-win_amd64.whl", hash = "sha256:d160744652e81c80627a909a0e808f3c6653a40af435744de037e3172cf277f5"}, + {file = "psycopg2-2.8.6-cp34-cp34m-win32.whl", hash = "sha256:b8cae8b2f022efa1f011cc753adb9cbadfa5a184431d09b273fb49b4167561ad"}, + {file = "psycopg2-2.8.6-cp34-cp34m-win_amd64.whl", hash = "sha256:f22ea9b67aea4f4a1718300908a2fb62b3e4276cf00bd829a97ab5894af42ea3"}, + {file = "psycopg2-2.8.6-cp35-cp35m-win32.whl", hash = "sha256:26e7fd115a6db75267b325de0fba089b911a4a12ebd3d0b5e7acb7028bc46821"}, + {file = "psycopg2-2.8.6-cp35-cp35m-win_amd64.whl", hash = "sha256:00195b5f6832dbf2876b8bf77f12bdce648224c89c880719c745b90515233301"}, + {file = "psycopg2-2.8.6-cp36-cp36m-win32.whl", hash = "sha256:a49833abfdede8985ba3f3ec641f771cca215479f41523e99dace96d5b8cce2a"}, + {file = "psycopg2-2.8.6-cp36-cp36m-win_amd64.whl", hash = "sha256:f974c96fca34ae9e4f49839ba6b78addf0346777b46c4da27a7bf54f48d3057d"}, + {file = "psycopg2-2.8.6-cp37-cp37m-win32.whl", hash = "sha256:6a3d9efb6f36f1fe6aa8dbb5af55e067db802502c55a9defa47c5a1dad41df84"}, + {file = "psycopg2-2.8.6-cp37-cp37m-win_amd64.whl", hash = "sha256:56fee7f818d032f802b8eed81ef0c1232b8b42390df189cab9cfa87573fe52c5"}, + {file = "psycopg2-2.8.6-cp38-cp38-win32.whl", hash = "sha256:ad2fe8a37be669082e61fb001c185ffb58867fdbb3e7a6b0b0d2ffe232353a3e"}, + {file = "psycopg2-2.8.6-cp38-cp38-win_amd64.whl", hash = "sha256:56007a226b8e95aa980ada7abdea6b40b75ce62a433bd27cec7a8178d57f4051"}, + {file = "psycopg2-2.8.6-cp39-cp39-win32.whl", hash = "sha256:2c93d4d16933fea5bbacbe1aaf8fa8c1348740b2e50b3735d1b0bf8154cbf0f3"}, + {file = "psycopg2-2.8.6-cp39-cp39-win_amd64.whl", hash = "sha256:d5062ae50b222da28253059880a871dc87e099c25cb68acf613d9d227413d6f7"}, + {file = "psycopg2-2.8.6.tar.gz", hash = "sha256:fb23f6c71107c37fd667cb4ea363ddeb936b348bbd6449278eb92c189699f543"}, +] requests = [ {file = "requests-2.25.0-py2.py3-none-any.whl", hash = "sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998"}, {file = "requests-2.25.0.tar.gz", hash = "sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8"}, ] +sqlalchemy = [ + {file = "SQLAlchemy-1.3.20-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bad73f9888d30f9e1d57ac8829f8a12091bdee4949b91db279569774a866a18e"}, + {file = "SQLAlchemy-1.3.20-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:e32e3455db14602b6117f0f422f46bc297a3853ae2c322ecd1e2c4c04daf6ed5"}, + {file = "SQLAlchemy-1.3.20-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:5cdfe54c1e37279dc70d92815464b77cd8ee30725adc9350f06074f91dbfeed2"}, + {file = "SQLAlchemy-1.3.20-cp27-cp27m-win32.whl", hash = "sha256:2e9bd5b23bba8ae8ce4219c9333974ff5e103c857d9ff0e4b73dc4cb244c7d86"}, + {file = "SQLAlchemy-1.3.20-cp27-cp27m-win_amd64.whl", hash = "sha256:5d92c18458a4aa27497a986038d5d797b5279268a2de303cd00910658e8d149c"}, + {file = "SQLAlchemy-1.3.20-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:53fd857c6c8ffc0aa6a5a3a2619f6a74247e42ec9e46b836a8ffa4abe7aab327"}, + {file = "SQLAlchemy-1.3.20-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:0a92745bb1ebbcb3985ed7bda379b94627f0edbc6c82e9e4bac4fb5647ae609a"}, + {file = "SQLAlchemy-1.3.20-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:b6f036ecc017ec2e2cc2a40615b41850dc7aaaea6a932628c0afc73ab98ba3fb"}, + {file = "SQLAlchemy-1.3.20-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:3aa6d45e149a16aa1f0c46816397e12313d5e37f22205c26e06975e150ffcf2a"}, + {file = "SQLAlchemy-1.3.20-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:ed53209b5f0f383acb49a927179fa51a6e2259878e164273ebc6815f3a752465"}, + {file = "SQLAlchemy-1.3.20-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:d3b709d64b5cf064972b3763b47139e4a0dc4ae28a36437757f7663f67b99710"}, + {file = "SQLAlchemy-1.3.20-cp35-cp35m-win32.whl", hash = "sha256:950f0e17ffba7a7ceb0dd056567bc5ade22a11a75920b0e8298865dc28c0eff6"}, + {file = "SQLAlchemy-1.3.20-cp35-cp35m-win_amd64.whl", hash = "sha256:8dcbf377529a9af167cbfc5b8acec0fadd7c2357fc282a1494c222d3abfc9629"}, + {file = "SQLAlchemy-1.3.20-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:0157c269701d88f5faf1fa0e4560e4d814f210c01a5b55df3cab95e9346a8bcc"}, + {file = "SQLAlchemy-1.3.20-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:7cd40cb4bc50d9e87b3540b23df6e6b24821ba7e1f305c1492b0806c33dbdbec"}, + {file = "SQLAlchemy-1.3.20-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:c092fe282de83d48e64d306b4bce03114859cdbfe19bf8a978a78a0d44ddadb1"}, + {file = "SQLAlchemy-1.3.20-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:166917a729b9226decff29416f212c516227c2eb8a9c9f920d69ced24e30109f"}, + {file = "SQLAlchemy-1.3.20-cp36-cp36m-win32.whl", hash = "sha256:632b32183c0cb0053194a4085c304bc2320e5299f77e3024556fa2aa395c2a8b"}, + {file = "SQLAlchemy-1.3.20-cp36-cp36m-win_amd64.whl", hash = "sha256:bbc58fca72ce45a64bb02b87f73df58e29848b693869e58bd890b2ddbb42d83b"}, + {file = "SQLAlchemy-1.3.20-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:b15002b9788ffe84e42baffc334739d3b68008a973d65fad0a410ca5d0531980"}, + {file = "SQLAlchemy-1.3.20-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:9e379674728f43a0cd95c423ac0e95262500f9bfd81d33b999daa8ea1756d162"}, + {file = "SQLAlchemy-1.3.20-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:2b5dafed97f778e9901b79cc01b88d39c605e0545b4541f2551a2fd785adc15b"}, + {file = "SQLAlchemy-1.3.20-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:fcdb3755a7c355bc29df1b5e6fb8226d5c8b90551d202d69d0076a8a5649d68b"}, + {file = "SQLAlchemy-1.3.20-cp37-cp37m-win32.whl", hash = "sha256:bca4d367a725694dae3dfdc86cf1d1622b9f414e70bd19651f5ac4fb3aa96d61"}, + {file = "SQLAlchemy-1.3.20-cp37-cp37m-win_amd64.whl", hash = "sha256:f605f348f4e6a2ba00acb3399c71d213b92f27f2383fc4abebf7a37368c12142"}, + {file = "SQLAlchemy-1.3.20-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:84f0ac4a09971536b38cc5d515d6add7926a7e13baa25135a1dbb6afa351a376"}, + {file = "SQLAlchemy-1.3.20-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2909dffe5c9a615b7e6c92d1ac2d31e3026dc436440a4f750f4749d114d88ceb"}, + {file = "SQLAlchemy-1.3.20-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:c3ab23ee9674336654bf9cac30eb75ac6acb9150dc4b1391bec533a7a4126471"}, + {file = "SQLAlchemy-1.3.20-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:009e8388d4d551a2107632921320886650b46332f61dc935e70c8bcf37d8e0d6"}, + {file = "SQLAlchemy-1.3.20-cp38-cp38-win32.whl", hash = "sha256:bf53d8dddfc3e53a5bda65f7f4aa40fae306843641e3e8e701c18a5609471edf"}, + {file = "SQLAlchemy-1.3.20-cp38-cp38-win_amd64.whl", hash = "sha256:7c735c7a6db8ee9554a3935e741cf288f7dcbe8706320251eb38c412e6a4281d"}, + {file = "SQLAlchemy-1.3.20-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:4bdbdb8ca577c6c366d15791747c1de6ab14529115a2eb52774240c412a7b403"}, + {file = "SQLAlchemy-1.3.20-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:ce64a44c867d128ab8e675f587aae7f61bd2db836a3c4ba522d884cd7c298a77"}, + {file = "SQLAlchemy-1.3.20-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:be41d5de7a8e241864189b7530ca4aaf56a5204332caa70555c2d96379e18079"}, + {file = "SQLAlchemy-1.3.20-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:1f5f369202912be72fdf9a8f25067a5ece31a2b38507bb869306f173336348da"}, + {file = "SQLAlchemy-1.3.20-cp39-cp39-win32.whl", hash = "sha256:0cca1844ba870e81c03633a99aa3dc62256fb96323431a5dec7d4e503c26372d"}, + {file = "SQLAlchemy-1.3.20-cp39-cp39-win_amd64.whl", hash = "sha256:d05cef4a164b44ffda58200efcb22355350979e000828479971ebca49b82ddb1"}, + {file = "SQLAlchemy-1.3.20.tar.gz", hash = "sha256:d2f25c7f410338d31666d7ddedfa67570900e248b940d186b48461bd4e5569a1"}, +] urllib3 = [ {file = "urllib3-1.26.2-py2.py3-none-any.whl", hash = "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"}, {file = "urllib3-1.26.2.tar.gz", hash = "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08"}, diff --git a/pyproject.toml b/pyproject.toml index 24bf8fa..5bf3623 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,8 @@ license = "MIT" python = "^3.7" click = "^7.1.2" requests = "^2.25.0" +SQLAlchemy = "^1.3.20" +psycopg2 = {version = "^2.8.6", extras = ["postgresql"]} [tool.poetry.dev-dependencies] From e1605113ab24f9fde82233689a53ab13c5d0d736 Mon Sep 17 00:00:00 2001 From: Wesley Batista Date: Mon, 7 Dec 2020 21:17:25 -0300 Subject: [PATCH 3/3] add upload command --- b3_data/main.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/b3_data/main.py b/b3_data/main.py index a0d0bd1..73bdd36 100644 --- a/b3_data/main.py +++ b/b3_data/main.py @@ -2,7 +2,10 @@ import click -@click.group() +PREFIX = 'B3DATA' + + +@click.group(context_settings=dict(auto_envvar_prefix=PREFIX)) def cli(): "Utility for http://b3.com.br datasets" @@ -16,3 +19,15 @@ def download(date, chunk_size): """Downloads quotes data""" from b3_data import download download.download_tickercsv(str(date.date()), chunk_size) + + +@cli.command() +@click.argument("date", + type=click.DateTime(formats=["%Y-%m-%d"]), + default=str(date.today())) +@click.option("--chunk-size", type=int, default=100000) +@click.option('--dsn', required=True, help=f'Set the environment variable {PREFIX}_UPLOAD_DSN') +def upload(date, chunk_size, dsn): + """Uploads quotes data""" + from b3_data import upload + upload.upload(dsn, chunk_size, str(date.date()))