Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 34 additions & 30 deletions examples/download_contracts_etherscan_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# -*- coding: UTF-8 -*-
# github.com/tintinweb
#
import os
"""

HACKy - non productive - script to download contracts from etherscan.io with throtteling.
Expand All @@ -19,46 +20,48 @@ class EtherScanIoApi(object):
"""

def __init__(self, proxies={}):
self.session = UserAgent(baseurl="https://etherscan.io", retry=5, retrydelay=8, proxies=proxies)
self.session = UserAgent(
baseurl="https://etherscan.io", retry=5, retrydelay=8, proxies=proxies)

def get_contracts(self, start=0, end=None):
page = start

while not end or page <= end:
resp = self.session.get("/contractsVerified/%d" % page).text
page, lastpage = re.findall(r'Page <b>(\d+)</b> of <b>(\d+)</b>', resp)[0]
page, lastpage = int(page),int(lastpage)
page, lastpage = re.findall(
r'Page <.*>(\d+)</.*> of <.*>(\d+)</.*>', resp)[0]
page, lastpage = int(page), int(lastpage)
if not end:
end = lastpage
rows = self._parse_tbodies(resp)[0] # only use first tbody
for col in rows:
contract = {'address': self._extract_text_from_html(col[0]).split(" ",1)[0],
'name': self._extract_text_from_html(col[1]),
'compiler': self._extract_text_from_html(col[2]),
'balance': self._extract_text_from_html(col[3]),
'txcount': int(self._extract_text_from_html(col[4])),
'settings': self._extract_text_from_html(col[5]),
'date': self._extract_text_from_html(col[6]),
}
contract = {'address': self._extract_text_from_html(col[0]).split(" ", 1)[0],
'name': self._extract_text_from_html(col[1]),
'compiler': self._extract_text_from_html(col[2]),
'balance': self._extract_text_from_html(col[3]),
'txcount': int(self._extract_text_from_html(col[4])),
'settings': self._extract_text_from_html(col[5]),
'date': self._extract_text_from_html(col[6]),
}
yield contract
page += 1

def get_contract_source(self, address):
import time
e = None
for _ in range(20):
resp = self.session.get("/address/%s"%address).text
resp = self.session.get("/address/%s" % address).text
if "You have reached your maximum request limit for this resource. Please try again later" in resp:
print("[[THROTTELING]]")
time.sleep(1+2.5*_)
continue
try:
print("=======================================================")
print(address)
#print(resp)
resp = resp.split("</span><pre class='js-sourcecopyarea' id='editor' style='margin-top: 5px;'>",1)[1]
resp = resp.split("</pre><br>",1)[0]
return resp.replace("&lt;", "<").replace("&gt;", ">").replace("&le;","<=").replace("&ge;",">=").replace("&amp;","&").replace("&vert;","|")
resp = resp.split(
"</div><pre class='js-sourcecopyarea' id='editor' style='margin-top: 5px;'>", 1)[1]
resp = resp.split("</pre><br>", 1)[0]
return resp.replace("&lt;", "<").replace("&gt;", ">").replace("&le;", "<=").replace("&ge;", ">=").replace("&amp;", "&").replace("&vert;", "|")
except Exception as e:
print(e)
time.sleep(1 + 2.5 * _)
Expand All @@ -84,7 +87,8 @@ def _get_pageable_data(self, path, start=0, length=10):
for san_k in set(keys).intersection(set(("account", "blocknumber", "type", "direction"))):
item[san_k] = self._extract_text_from_html(item[san_k])
for san_k in set(keys).intersection(("parenthash", "from", "to", "address")):
item[san_k] = self._extract_hexstr_from_html_attrib(item[san_k])
item[san_k] = self._extract_hexstr_from_html_attrib(
item[san_k])
return resp

def _parse_tbodies(self, data):
Expand All @@ -98,23 +102,22 @@ def _parse_tbodies(self, data):
return tbodies


import os

if __name__=="__main__":
if __name__ == "__main__":
output_directory = "./output"
overwrite = False
amount = 100000
amount = 1

e = EtherScanIoApi()
for nr,c in enumerate(e.get_contracts()):
with open("contracts.json",'a') as f:
f.write("%s\n"%c)
for nr, c in enumerate(e.get_contracts()):
with open("contracts.json", 'a') as f:
f.write("%s\n" % c)
print("got contract: %s" % c)
dst = os.path.join(output_directory, c["address"].replace("0x", "")[:2].lower()) # index by 1st byte
dst = os.path.join(output_directory, c["address"].replace(
"0x", "")[:2].lower()) # index by 1st byte
if not os.path.isdir(dst):
os.makedirs(dst)
fpath = os.path.join(dst, "%s_%s.sol" % (
c["address"].replace("0x", ""), str(c['name']).replace("\\", "_").replace("/", "_")))
c["address"].replace("0x", ""), str(c['name']).replace("\\", "_").replace("/", "_")))
if not overwrite and os.path.exists(fpath):
print(
"[%d/%d] skipping, already exists --> %s (%-20s) -> %s" % (nr, amount, c["address"], c["name"], fpath))
Expand All @@ -127,13 +130,14 @@ def _parse_tbodies(self, data):
except Exception as e:
continue


with open(fpath, "wb") as f:
f.write(bytes(source, "utf8"))

print("[%d/%d] dumped --> %s (%-20s) -> %s" % (nr, amount, c["address"], c["name"], fpath))
print("[%d/%d] dumped --> %s (%-20s) -> %s" %
(nr, amount, c["address"], c["name"], fpath))

nr += 1
if nr >= amount:
print("[%d/%d] finished. maximum amount of contracts to download reached." % (nr, amount))
break
print(
"[%d/%d] finished. maximum amount of contracts to download reached." % (nr, amount))
break