diff --git a/examples/download_contracts_etherscan_io.py b/examples/download_contracts_etherscan_io.py index 9ddb8cb..67c2922 100644 --- a/examples/download_contracts_etherscan_io.py +++ b/examples/download_contracts_etherscan_io.py @@ -2,6 +2,7 @@ # -*- coding: UTF-8 -*- # github.com/tintinweb # +import os """ HACKy - non productive - script to download contracts from etherscan.io with throtteling. @@ -19,27 +20,29 @@ class EtherScanIoApi(object): """ def __init__(self, proxies={}): - self.session = UserAgent(baseurl="https://etherscan.io", retry=5, retrydelay=8, proxies=proxies) + self.session = UserAgent( + baseurl="https://etherscan.io", retry=5, retrydelay=8, proxies=proxies) def get_contracts(self, start=0, end=None): page = start while not end or page <= end: resp = self.session.get("/contractsVerified/%d" % page).text - page, lastpage = re.findall(r'Page (\d+) of (\d+)', resp)[0] - page, lastpage = int(page),int(lastpage) + page, lastpage = re.findall( + r'Page <.*>(\d+) of <.*>(\d+)', resp)[0] + page, lastpage = int(page), int(lastpage) if not end: end = lastpage rows = self._parse_tbodies(resp)[0] # only use first tbody for col in rows: - contract = {'address': self._extract_text_from_html(col[0]).split(" ",1)[0], - 'name': self._extract_text_from_html(col[1]), - 'compiler': self._extract_text_from_html(col[2]), - 'balance': self._extract_text_from_html(col[3]), - 'txcount': int(self._extract_text_from_html(col[4])), - 'settings': self._extract_text_from_html(col[5]), - 'date': self._extract_text_from_html(col[6]), - } + contract = {'address': self._extract_text_from_html(col[0]).split(" ", 1)[0], + 'name': self._extract_text_from_html(col[1]), + 'compiler': self._extract_text_from_html(col[2]), + 'balance': self._extract_text_from_html(col[3]), + 'txcount': int(self._extract_text_from_html(col[4])), + 'settings': self._extract_text_from_html(col[5]), + 'date': self._extract_text_from_html(col[6]), + } yield contract page += 1 @@ -47,7 +50,7 @@ def get_contract_source(self, address): import time e = None for _ in range(20): - resp = self.session.get("/address/%s"%address).text + resp = self.session.get("/address/%s" % address).text if "You have reached your maximum request limit for this resource. Please try again later" in resp: print("[[THROTTELING]]") time.sleep(1+2.5*_) @@ -55,10 +58,10 @@ def get_contract_source(self, address): try: print("=======================================================") print(address) - #print(resp) - resp = resp.split("
",1)[1]
- resp = resp.split("", 1)[1]
+ resp = resp.split("