From 3eeb923b45dd90a8ec602712a1ba1a863ac4d9c7 Mon Sep 17 00:00:00 2001 From: xxxx-oooo Date: Sat, 7 Dec 2024 23:10:44 +0800 Subject: [PATCH 1/3] python package upgraded, support py 3.11 and bugs fixed --- .flake8 | 13 ++ .gitignore | 4 + .pre-commit-config.yaml | 11 ++ pyproject.toml | 17 ++ requirements.txt | 11 +- src/__init__.py | 1 - src/ethereum_datafarm.py | 350 ++++++++++++++++++++++++--------------- src/run.py | 8 +- src/utils.py | 257 +++++++++++++++++----------- 9 files changed, 430 insertions(+), 242 deletions(-) create mode 100644 .flake8 create mode 100644 .pre-commit-config.yaml create mode 100644 pyproject.toml diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..6c4eb04 --- /dev/null +++ b/.flake8 @@ -0,0 +1,13 @@ +[flake8] +ignore = E203, E231, E266, E501, E721, E722, W503, F403, F405, F401, W605 +exclude = + .git, + __pycache__/, + migrations/, + static, + templates, + build, + dist +max-line-length = 79 +max-complexity = 18 +select = B,C,E,F,W,T4,B9 diff --git a/.gitignore b/.gitignore index 7be88f0..af25b9b 100644 --- a/.gitignore +++ b/.gitignore @@ -142,3 +142,7 @@ cython_debug/ .vscode/ bin/ + +abis/ +data/ +tmp/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..1cefd5a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: +- repo: https://github.com/psf/black + rev: 22.10.0 + hooks: + - id: black + language_version: python3.11 +- repo: https://github.com/PyCQA/flake8 + rev: "7.1.0" + hooks: + - id: flake8 + args: ["--max-line-length", "79"] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b352850 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,17 @@ +[tool.black] +line-length = 79 +include = '\.pyi?$' +exclude = ''' +/( + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | requirements + | build + | dist +)/ +''' diff --git a/requirements.txt b/requirements.txt index e5d1f27..cf76608 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -requests==2.28.1 -termcolor==1.1.0 -pysha3==1.0.2 -web3==5.30.0 -pandas==1.4.3 +requests==2.32.3 +termcolor==2.5.0 +#pysha3==1.0.2 +pycryptodome==3.21.0 +web3==7.6.0 +pandas==2.2.3 diff --git a/src/__init__.py b/src/__init__.py index 8b13789..e69de29 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1 +0,0 @@ - diff --git a/src/ethereum_datafarm.py b/src/ethereum_datafarm.py index 49030c8..b432f1d 100644 --- a/src/ethereum_datafarm.py +++ b/src/ethereum_datafarm.py @@ -1,93 +1,106 @@ -from utils import * -import requests import json -from web3 import Web3 -from multiprocessing import Process, cpu_count, connection +import random +from multiprocessing import Process, connection, cpu_count + import pandas as pd +import requests from eth_abi import decode as abi_decode -import random +from web3 import Web3 -SLOW_DOWN = 0 # seconds to wait between api calls -STORAGE_THRESHOLD = 9e3 +from utils import * +SLOW_DOWN = 0 # seconds to wait between api calls +STORAGE_THRESHOLD = 9e3 -class Farm(): +class Farm: def __init__(self): print_start() - log("".join(["="]*50)+ "\nStart new farm instance...") + log("".join(["="] * 50) + "\nStart new farm instance...") self.contracts = list() - + def load_contracts(self): try: for c in load_all(): self.contracts.append(Contract(*c)) - except: - msg = colored(f"\nLoading contracts interrupted", "red", attrs=["bold"]) - raise ContractLoadingInterrupted(msg) - + except Exception: + msg = colored( + "\nLoading contracts interrupted", "red", attrs=["bold"] + ) + raise ContractLoadingInterrupted(msg) + def farm(self): - + print(INFO_MSG.format("Start farming...")) try: if CORES > 1: - cpus = CORES-1 # CORES form utils.py + cpus = CORES - 1 # CORES form utils.py else: cpus = 1 - #self.contracts = random.shuffle(self.contracts) - trs = int(len(self.contracts)/cpus) - tranches={} + # self.contracts = random.shuffle(self.contracts) + trs = int(len(self.contracts) / cpus) + tranches = {} for i in range(cpus): - if i == cpus-1: - tranches[i] = self.contracts[trs*i:] - else: - tranches[i] = self.contracts[trs*i:trs*(i+1)] + if i == cpus - 1: + tranches[i] = self.contracts[trs * i :] + else: + tranches[i] = self.contracts[trs * i : trs * (i + 1)] processes = [] for i in range(cpus): - p = Process(target = self.split_tasks, args=tuple([tranches[i]])) + p = Process(target=self.split_tasks, args=tuple([tranches[i]])) p.start() processes.append(p) connection.wait(p.sentinel for p in processes) - + except KeyboardInterrupt: msg = colored("Safely terminating...\n", "green", attrs=["bold"]) print(INFO_MSG.format(msg)) if len(processes) > 0: for p in processes: - p.terminate() - - def split_tasks(self, c): - for contract in c: - msg = colored(f"Start parsing {contract}", "green", attrs=["bold"]) - print(INFO_MSG.format(msg)) - log(f"Start parsing {contract}") - contract.scrape() - + p.terminate() -class Contract(): + def split_tasks(self, c): + for contract in c: + msg = colored(f"Start parsing {contract}", "green", attrs=["bold"]) + print(INFO_MSG.format(msg)) + log(f"Start parsing {contract}") + contract.scrape() + + +class Contract: def __init__(self, address, name, method, startBlock, chunksize): - self.address = Web3.toChecksumAddress(address) + self.address = Web3.to_checksum_address(address) self.name = name.lower() self.method = method self.simpleMethod = method.split("(")[0].lower() self.topic0 = get_method_from_canonical_expression(self.method) - - newStartBlock, newStartTx = check_custom_start(self.name, self.simpleMethod) + + newStartBlock, newStartTx = check_custom_start( + self.name, self.simpleMethod + ) if newStartBlock: self.startBlock = newStartBlock self.startTx = newStartTx - - msg = "{} ({}) {}".format(self.address, self.name, colored("starting at last known location", "green")) - msg2 = "{} ({}) blockheight set to {:,.0f}".format(self.address, self.name, self.startBlock) + + msg = "{} ({}) {}".format( + self.address, + self.name, + colored("starting at last known location", "green"), + ) + msg2 = "{} ({}) blockheight set to {:,.0f}".format( + self.address, self.name, self.startBlock + ) print(INFO_MSG.format(msg)) print(INFO_MSG.format(msg2)) - + if newStartTx == "None": self.run = True self.startTx = None - + else: - msg3 = "{} ({}) starting after tx {}".format(self.address, self.name, self.startTx[:-56]+"...") + msg3 = "{} ({}) starting after tx {}".format( + self.address, self.name, self.startTx[:-56] + "..." + ) print(INFO_MSG.format(msg3)) self.run = False @@ -95,135 +108,162 @@ def __init__(self, address, name, method, startBlock, chunksize): self.startBlock = int(startBlock) self.startTx = None self.run = True - + self.fromblock = self.startBlock self.chunksize = int(chunksize) - + self.printName = get_print_name(self.name) self.printMethod = get_print_method(self.method) - self.storageLocation = f"../data/{self.name}/{self.simpleMethod}_" + "{}.csv" - + self.storageLocation = ( + f"../data/{self.name}/{self.simpleMethod}_" + "{}.csv" + ) + self.abi = get_abi(self) eventInfo = get_event_info(self) self.evINames, self.evNames, self.evITypes, self.evTypes = eventInfo - + self.columns = BASIC_HEADER + self.evINames + self.evNames - + self.CACHE = pd.DataFrame(columns=self.columns) self.LATEST_BLOCK = latest_block() self.timeSinceLatestBlock = datetime.now() self.avgNrOfPages = [1.5] - + self.fileCounter = set_up_directory(self.name, self.simpleMethod) - def scrape(self): while self.fromblock < self.LATEST_BLOCK: - + self.try_adapting_chunksize() - + self.toblock = self.fromblock + self.chunksize - + if self.toblock > self.LATEST_BLOCK: - self.toblock = self.LATEST_BLOCK - - results = [0]*1001 + self.toblock = self.LATEST_BLOCK + + results = [0] * 1001 page = 1 while len(results) >= 1000: - - payload = build_payload(self.fromblock, self.toblock, self.address, self.topic0, page) + + payload = build_payload( + self.fromblock, + self.toblock, + self.address, + self.topic0, + page, + ) results = send_payload(payload) time.sleep(SLOW_DOWN) - + success = True if results == "no records found": self.log_nothing_found() - self.avgNrOfPages.append(1) + self.avgNrOfPages.append(1) self.avgNrOfPages = self.avgNrOfPages[-10:] results = [0] continue - + if results == "page limit reached": msg = "decreasing chunk size and trying again..." print(WARN_MSG.format(msg)) - self.fromblock, self.startTx = check_custom_start(self.name, self.simpleMethod) + self.fromblock, self.startTx = check_custom_start( + self.name, self.simpleMethod + ) self.CACHE = pd.DataFrame(columns=self.columns) self.run = False - self.chunksize = int(self.chunksize/10) - if self.chunksize < 1: self.chunksize = 1 - self.log_chunk_size(self.chunksize*10, "decreasing") + self.chunksize = int(self.chunksize / 10) + if self.chunksize < 1: + self.chunksize = 1 + self.log_chunk_size(self.chunksize * 10, "decreasing") self.avgNrOfPages.append(1.5) self.avgNrOfPages = self.avgNrOfPages[-10:] results = [0] success = False continue - + self.parse_results(results) - + self.log_progress(len(results), page) page += 1 - + if not success: continue - + if page - 1 >= 1: - self.avgNrOfPages.append(page - 1) + self.avgNrOfPages.append(page - 1) self.avgNrOfPages = self.avgNrOfPages[-10:] else: - self.avgNrOfPages.append(1) + self.avgNrOfPages.append(1) self.avgNrOfPages = self.avgNrOfPages[-10:] - + # Update latest block ever 600 seconds - if (datetime.now()-self.timeSinceLatestBlock).total_seconds() > 6e2: + if ( + datetime.now() - self.timeSinceLatestBlock + ).total_seconds() > 6e2: msg = f"updating latest block for {self.name}" print(INFO_MSG.format(msg)) self.LATEST_BLOCK = latest_block() self.timeSinceLatestBlock = datetime.now() - - self.fromblock = self.toblock + 1 - + + self.fromblock = self.toblock + 1 + if len(self.CACHE) > 0 and self.run: self.log_storage() - dump_cache_to_disk(self.CACHE, self.storageLocation.format(self.fileCounter), self.name, self.simpleMethod) + dump_cache_to_disk( + self.CACHE, + self.storageLocation.format(self.fileCounter), + self.name, + self.simpleMethod, + ) self.CACHE = pd.DataFrame(columns=self.columns) self.fileCounter += 1 - - content = "{}-{}".format(self.fromblock,"None") - with open(f"../tmp/{self.name}_{self.simpleMethod}_last_stored_tx.txt", "w") as f: + + content = "{}-{}".format(self.fromblock, "None") + with open( + f"../tmp/{self.name}_{self.simpleMethod}_last_stored_tx.txt", "w" + ) as f: f.write(content) - + self.log_end() - + def parse_results(self, results): for r in results: indexed_topics = [] non_indexed_topics = [] - - timeStamp = from_hex(r['timeStamp']) - blockNumber = from_hex(r['blockNumber']) - transactionHash = r['transactionHash'] - transactionIndex = from_hex(r['transactionIndex']) - logIndex = from_hex(r['logIndex']) - gasPrice = from_hex(r['gasPrice']) - gasUsed = from_hex(r['gasUsed']) - + + timeStamp = from_hex(r["timeStamp"]) + blockNumber = from_hex(r["blockNumber"]) + transactionHash = r["transactionHash"] + transactionIndex = from_hex(r["transactionIndex"]) + logIndex = from_hex(r["logIndex"]) + gasPrice = from_hex(r["gasPrice"]) + gasUsed = from_hex(r["gasUsed"]) + for index, t in enumerate(r["topics"][1:]): t = abi_decode([self.evITypes[index]], bytes.fromhex(t[2:])) indexed_topics.append(t[0]) - + data = r["data"][2:] - non_indexed_topics = list(abi_decode(self.evTypes, bytes.fromhex(data))) + non_indexed_topics = list( + abi_decode(self.evTypes, bytes.fromhex(data)) + ) + + eventInfo = [ + self.address, + blockNumber, + timeStamp, + transactionHash, + transactionIndex, + gasPrice, + gasUsed, + logIndex, + ] - - eventInfo = [self.address, blockNumber, timeStamp, - transactionHash, transactionIndex, - gasPrice, gasUsed, logIndex] - for indexedTopic in indexed_topics: eventInfo += [indexedTopic] - + for nonIndexedTopic in non_indexed_topics: eventInfo += [nonIndexedTopic] @@ -231,78 +271,112 @@ def parse_results(self, results): if len(self.CACHE) >= STORAGE_THRESHOLD and self.run: self.log_storage() - dump_cache_to_disk(self.CACHE, self.storageLocation.format(self.fileCounter), self.name, self.simpleMethod) + dump_cache_to_disk( + self.CACHE, + self.storageLocation.format(self.fileCounter), + self.name, + self.simpleMethod, + ) self.CACHE = pd.DataFrame(columns=self.columns) self.fileCounter += 1 - + if self.startTx == transactionHash: if not self.run: msg = f"contract @ {self.address} ({self.name}) last known tx found; start parsing..." print(INFO_MSG.format(msg)) self.run = True self.CACHE = pd.DataFrame(columns=self.columns) - - + def make_row(self, *args): self.CACHE.loc[len(self.CACHE)] = args[0] - + def try_adapting_chunksize(self): op = None - if sum(self.avgNrOfPages)/len(self.avgNrOfPages) > 3: + if sum(self.avgNrOfPages) / len(self.avgNrOfPages) > 3: old_cs = self.chunksize - self.chunksize = int(self.chunksize/5) + self.chunksize = int(self.chunksize / 5) op = "decreasing" - self.avgNrOfPages = [3]*10 - - - elif sum(self.avgNrOfPages)/len(self.avgNrOfPages) <= 1: + self.avgNrOfPages = [3] * 10 + + elif sum(self.avgNrOfPages) / len(self.avgNrOfPages) <= 1: old_cs = self.chunksize - if self.chunksize < 5: factor = 1.5 - else: factor = 1.2 - self.chunksize = int(self.chunksize*factor) + if self.chunksize < 5: + factor = 1.5 + else: + factor = 1.2 + self.chunksize = int(self.chunksize * factor) self.avgNrOfPages.append(1.5) self.avgNrOfPages = self.avgNrOfPages[-10:] op = "increasing" - + if self.chunksize <= 1: self.chunksize = 2 if self.chunksize > 100000: self.chunksize = 100000 if op: self.log_chunk_size(old_cs, op) - + def log_progress(self, len_result, page): - _printName = self.printName[:16]+"..." if len(self.printName) >= 16 else self.printName + " " - msg = "parsing {0:<19} | ".format(_printName[:19]) \ - + "{:>10,.0f}-{:>10,.0f} | ".format(self.fromblock, self.toblock) \ - + colored("{:>4.0f}/1000".format(len_result), "green") + f" | Page {page}" \ - + " | cs {:>7,.0f} | cache {:>6,.0f}".format(self.chunksize, len(self.CACHE)) - + _printName = ( + self.printName[:16] + "..." + if len(self.printName) >= 16 + else self.printName + " " + ) + msg = ( + "parsing {0:<19} | ".format(_printName[:19]) + + "{:>10,.0f}-{:>10,.0f} | ".format(self.fromblock, self.toblock) + + colored("{:>4.0f}/1000".format(len_result), "green") + + f" | Page {page}" + + " | cs {:>7,.0f} | cache {:>6,.0f}".format( + self.chunksize, len(self.CACHE) + ) + ) + print(INFO_MSG.format(msg)) - + def log_chunk_size(self, old_size, op): - avg = sum(self.avgNrOfPages)/len(self.avgNrOfPages) - msg = "{} chunk size for {}".format(op, self.printName[:17]+"...:") \ - + " {:,.0f} --> {:,.0f} with avg. pages of {:.2f}".format(old_size, self.chunksize, avg) + avg = sum(self.avgNrOfPages) / len(self.avgNrOfPages) + msg = "{} chunk size for {}".format( + op, self.printName[:17] + "...:" + ) + " {:,.0f} --> {:,.0f} with avg. pages of {:.2f}".format( + old_size, self.chunksize, avg + ) print(INFO_MSG.format(msg)) - + def log_nothing_found(self): - _printName = self.printName[:14]+"..." if len(self.printName) >= 14 else self.printName + " " - msg = colored("no result", "red") + " {0:<17} | ".format(colored(_printName[:17]), "red") \ - + "{:>10,.0f}-{:>10,.0f} | {:>9}".format(self.fromblock, self.toblock, "cs " + "{:,.0f}".format(self.chunksize)) + _printName = ( + self.printName[:14] + "..." + if len(self.printName) >= 14 + else self.printName + " " + ) + msg = ( + colored("no result", "red") + + " {0:<17} | ".format(colored(_printName[:17], "red")) + + "{:>10,.0f}-{:>10,.0f} | {:>9}".format( + self.fromblock, + self.toblock, + "cs " + "{:,.0f}".format(self.chunksize), + ) + ) print(INFO_MSG.format(msg)) - + def log_storage(self): - msg = colored("storing ", "green") + f"{self.printName} with " \ - "{:,.0f} entries @ ".format(len(self.CACHE)) \ - + colored(f"{self.storageLocation.format(self.fileCounter)}", "green") + msg = colored( + "storing ", "green" + ) + f"{self.printName} with " "{:,.0f} entries @ ".format( + len(self.CACHE) + ) + colored( + f"{self.storageLocation.format(self.fileCounter)}", "green" + ) print(INFO_MSG.format(msg)) - log(f"storing {self.printName} with {len(self.CACHE)} entries @ {self.storageLocation.format(self.fileCounter)}") - + log( + f"storing {self.printName} with {len(self.CACHE)} entries @ {self.storageLocation.format(self.fileCounter)}" + ) + def log_end(self): msg = colored(f"terminating {self.printName}", "green", attrs=["bold"]) print(INFO_MSG.format(msg)) log(f"terminating {self.printName}") - + def __repr__(self): return f"" diff --git a/src/run.py b/src/run.py index 0ba26e7..3a4b9c0 100644 --- a/src/run.py +++ b/src/run.py @@ -1,12 +1,12 @@ from ethereum_datafarm import * -if __name__=="__main__": - +if __name__ == "__main__": + # Initialize Farm farm = Farm() - + # Load Contracts farm.load_contracts() - + # Start parsing farm.farm() diff --git a/src/utils.py b/src/utils.py index 9626f97..fb5c8e0 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,20 +1,35 @@ -import requests +import argparse import json -import re import os -from termcolor import colored -from datetime import datetime -import numpy as np +import re import time -import sha3 -import argparse +from datetime import datetime from multiprocessing import cpu_count +import numpy as np +import requests -parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=60)) -parser.add_argument('-loc', '--location', help="output location - default: ../data", default="./data") -parser.add_argument('-c', '--cores', help="cores available", default=str(cpu_count())) -parser.add_argument('-log', '--log', help="activate logging", action='store_true') +# import sha3 +from Crypto.Hash import keccak +from termcolor import colored + +parser = argparse.ArgumentParser( + formatter_class=lambda prog: argparse.HelpFormatter( + prog, max_help_position=60 + ) +) +parser.add_argument( + "-loc", + "--location", + help="output location - default: ../data", + default="./data", +) +parser.add_argument( + "-c", "--cores", help="cores available", default=str(cpu_count()) +) +parser.add_argument( + "-log", "--log", help="activate logging", action="store_true" +) _args = parser.parse_args() @@ -25,32 +40,45 @@ with open("../key/key.txt", "r") as file: KEY = file.read() - + if not os.path.isdir("../abis"): os.mkdir("../abis") if not os.path.isdir(f"../{LOCATION}"): os.mkdir(f"../{LOCATION}") - + if not os.path.isdir("../tmp"): os.mkdir("../tmp") -PAYLOAD = "https://api.etherscan.io/api" \ - + "?module=logs" \ - + "&action=getLogs" \ - + "&fromBlock={}" \ - + "&toBlock={}" \ - + "&address={}" \ - + "&topic0={}" \ - + "&page={}" \ - + "&offset=1000" \ - + "&apikey={}".format(KEY) +PAYLOAD = ( + "https://api.etherscan.io/api" + + "?module=logs" + + "&action=getLogs" + + "&fromBlock={}" + + "&toBlock={}" + + "&address={}" + + "&topic0={}" + + "&page={}" + + "&offset=1000" + + "&apikey={}".format(KEY) +) + +BASIC_HEADER = [ + "address", + "blocknumber", + "timestamp", + "txhash", + "txindex", + "gas_price", + "gas_used", + "logindex", +] -BASIC_HEADER = ['address','blocknumber','timestamp','txhash','txindex','gas_price','gas_used','logindex'] def build_payload(*args): return PAYLOAD.format(*args) + def send_payload(payload): try: _res = requests.get(payload) @@ -61,28 +89,28 @@ def send_payload(payload): msg = "result window is too large" print(WARN_MSG.format(msg)) return "page limit reached" - + if int(res["status"]) != 1: - raise - - except: + raise + + except Exception: msg = "payload failed (fetching event)" print(WARN_MSG.format(msg)) log(msg) try: - print(res) + print(res) print(res["message"].lower()) - except: + except Exception: pass print(colored("Waiting for 10 seconds", "red")) time.sleep(10) return send_payload(payload) - + return res["result"] - - + + def dump_cache_to_disk(df, filename, name, method): - df = df.loc[:,~df.columns.duplicated()].copy() + df = df.loc[:, ~df.columns.duplicated()].copy() for c in df: if df[c].dtype == "float64": df[c] = df[c].apply(lambda x: int(x)) @@ -90,16 +118,16 @@ def dump_cache_to_disk(df, filename, name, method): if type(df[c][0]) == float: try: df[c] = df[c].apply(lambda x: int(x)) - except: + except Exception: pass last_row = df.iloc[-1] - content = "{}-{}".format(last_row["blocknumber"],last_row["txhash"]) + content = "{}-{}".format(last_row["blocknumber"], last_row["txhash"]) with open(f"../tmp/{name}_{method}_last_stored_tx.txt", "w") as f: f.write(content) df.to_csv(filename, index=None) - + def check_custom_start(name, method): if os.path.isfile(f"../tmp/{name}_{method}_last_stored_tx.txt"): with open(f"../tmp/{name}_{method}_last_stored_tx.txt", "r") as file: @@ -108,62 +136,74 @@ def check_custom_start(name, method): return int(startblock), starttx else: return [None, None] - def get_method_from_canonical_expression(method): - return '0x' + sha3.keccak_256(method.encode('utf-8')).hexdigest() + keccak_hash = keccak.new(digest_bits=256) + keccak_hash.update(method.encode("utf-8")) + return f"0x{keccak_hash.hexdigest()}" + # return "0x" + sha3.keccak_256(method.encode("utf-8")).hexdigest() + def get_print_name(name): if len(name) > 25: - return name[0:21]+"..." + return name[0:21] + "..." else: return name - + + def get_print_method(method): if len(method) > 25: - return method[0:21]+"..." + return method[0:21] + "..." else: return method - + + def from_hex(string): - if str(string) == '0x': + if str(string) == "0x": return 0 - return int(str(string),16) + return int(str(string), 16) + def convert_to(bytes32inHex, toType): if toType == "address": - return '0x' + bytes32inHex[-40:] + return "0x" + bytes32inHex[-40:] if "int" in toType: return from_hex(bytes32inHex) else: return bytes32inHex - + + def latest_block(): - payload = "https://api.etherscan.io/api" \ - + "?module=block" \ - + "&action=getblocknobytime" \ - + f"×tamp={round(datetime.timestamp(datetime.now()))}" \ - + "&closest=before" \ - + "&apikey={}".format(KEY) - - time.sleep(np.random.randint(1,3)) + payload = ( + "https://api.etherscan.io/api" + + "?module=block" + + "&action=getblocknobytime" + + f"×tamp={round(datetime.timestamp(datetime.now()))}" + + "&closest=before" + + "&apikey={}".format(KEY) + ) + + time.sleep(np.random.randint(1, 3)) try: res = requests.get(payload) - return int(json.loads(res.content)["result"]) - 6 # 6 blocks to make sure no re-orgs + return ( + int(json.loads(res.content)["result"]) - 6 + ) # 6 blocks to make sure no re-orgs except: - time.sleep(np.random.randint(1,10)) + time.sleep(np.random.randint(1, 10)) msg = "payload failed (latest block)" print(WARN_MSG.format(msg)) log(msg) return latest_block() - + + def get_event_info(contract): - inames=[] - names=[] - itypes=[] - types=[] - + inames = [] + names = [] + itypes = [] + types = [] + for i in contract.abi: if i["type"] != "event" or contract.simpleMethod != i["name"].lower(): continue @@ -175,32 +215,32 @@ def get_event_info(contract): names.append(args["name"]) types.append(args["type"]) return inames, names, itypes, types - + def verify_abi(abi, address, name, simpleMethod): success = True - + if "admin" in abi and "proxy" in abi: msg = f"contract @ {address} ({name}) contains the word ``admin``" print(WARN_MSG.format(msg)) # success = False -- let user know but dont force interruptions - + if f'"name":"{simpleMethod}"'.lower() not in abi.lower(): print(f'"name":"{simpleMethod}"'.lower()) msg = f"contract @ {address} ({name}) does not contain method {simpleMethod}; maybe proxie contract?" print(WARN_MSG.format(msg)) success = False - + if "not verified" in abi: msg = f"contract @ {address} ({name}) not verified;" print(WARN_MSG.format(msg)) success = False - + if not abi.endswith("]"): msg = f"contract @ {address} ({name}) abi probably broken;" print(WARN_MSG.format(msg)) - success = False - + success = False + return success @@ -208,28 +248,36 @@ def get_abi(contract): try: with open(f"../abis/{contract.name}.abi", "r") as file: abi = file.read() - success = verify_abi(abi, contract.address, contract.name, contract.simpleMethod) - + success = verify_abi( + abi, contract.address, contract.name, contract.simpleMethod + ) + if success: - msg = "abi found locally | " + colored("contract", "green") + f" @ {contract.address} ({contract.name})" + msg = ( + "abi found locally | " + + colored("contract", "green") + + f" @ {contract.address} ({contract.name})" + ) print(INFO_MSG.format(msg)) else: msg = f"contract @ {contract.address} ({contract.name}) failed to retrieve abi " - msg2 = f"contract @ {contract.address} ({contract.name}) add abi manually to the contract's abi file" + msg2 = f"contract @ {contract.address} ({contract.name}) add abi manually to the contract's abi file" print(WARN_MSG.format(msg)) print(WARN_MSG.format(msg2)) input("press any key to continue") return get_abi(contract) - + except: esc = f"https://api.etherscan.io/api?module=contract&action=getabi&address={contract.address}&apikey={KEY}" res = requests.get(esc) time.sleep(1) abi = json.loads(res.content)["result"] - - success = verify_abi(abi, contract.address, contract.name, contract.simpleMethod) - - if success: + + success = verify_abi( + abi, contract.address, contract.name, contract.simpleMethod + ) + + if success: msg = f"contract @ {contract.address} ({contract.name}) requesting abi " print(INFO_MSG.format(msg)) with open(f"../abis/{contract.name}.abi", "w") as file: @@ -247,22 +295,22 @@ def get_abi(contract): print(WARN_MSG.format(msg2)) input("press any key to continue") return get_abi(contract) - - abi = eval(abi.replace("false", "False").replace("true", "True")) return abi - -def load_all(contracts=[],start=True,config_location="../contracts.csv"): + + +def load_all(contracts=[], start=True, config_location="../contracts.csv"): with open("../contracts.csv", "r") as file: - file = file.read().replace(" ", "").split("\n") + file = file.read().replace(" ", "").split("\n") for f in file: if f == "": continue if f.startswith("#"): continue yield tuple(re.split("\,(?=.*\()|\,(?!.*\))", f)) - + + def set_up_directory(name, simpleMethod): fileCounter = 0 if not os.path.isdir(f"../{LOCATION}/{name}"): @@ -271,36 +319,57 @@ def set_up_directory(name, simpleMethod): elif len(os.listdir(f"../{LOCATION}/{name}")) > 0: print(WARN_MSG.format(f"non-empty directory ../{LOCATION}/{name}")) - lastFlNr = max(list(map(int, re.findall("[0-9]+",str(os.listdir(f"../{LOCATION}/{name}")))))+[0]) + lastFlNr = max( + list( + map( + int, + re.findall( + "[0-9]+", str(os.listdir(f"../{LOCATION}/{name}")) + ), + ) + ) + + [0] + ) fileCounter = lastFlNr + 1 - print(INFO_MSG.format(f"fileCounter set to {fileCounter} for {name} and method {simpleMethod}")) + print( + INFO_MSG.format( + f"fileCounter set to {fileCounter} for {name} and method {simpleMethod}" + ) + ) return fileCounter - + + def curtime(): - return "["+datetime.strftime(datetime.now(), "%m-%d|%H:%M:%S")+"]" - + return "[" + datetime.strftime(datetime.now(), "%m-%d|%H:%M:%S") + "]" + + INFO_MSG = colored("[INFO]", "green") + " {}".format(curtime()) + " {}" WARN_MSG = colored("[WARN]", "red") + " {}".format(curtime()) + " {}" + def log(msg): if LOGGING: with open("./log.txt", "a") as file: - file.write(msg+"\n") + file.write(msg + "\n") + def print_start(): c = """ - ███████╗████████╗██╗ ██╗ ██████╗ █████╗ ████████╗ █████╗ ███████╗ █████╗ ██████╗ ███╗ ███╗ ██████╗ ██████╗ + ███████╗████████╗██╗ ██╗ ██████╗ █████╗ ████████╗ █████╗ ███████╗ █████╗ ██████╗ ███╗ ███╗ ██████╗ ██████╗ ██╔════╝╚══██╔══╝██║ ██║ ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗██╔════╝██╔══██╗██╔══██╗████╗ ████║ ╚════██╗ ██╔═████╗ █████╗ ██║ ███████║ ██║ ██║███████║ ██║ ███████║█████╗ ███████║██████╔╝██╔████╔██║ █████╔╝ ██║██╔██║ ██╔══╝ ██║ ██╔══██║ ██║ ██║██╔══██║ ██║ ██╔══██║██╔══╝ ██╔══██║██╔══██╗██║╚██╔╝██║ ██╔═══╝ ████╔╝██║ ███████╗ ██║ ██║ ██║ ██████╔╝██║ ██║ ██║ ██║ ██║██║ ██║ ██║██║ ██║██║ ╚═╝ ██║ ███████╗██╗╚██████╔╝ ╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═════╝ """ - print(colored(c, "green", attrs=["dark","bold"])) - print(colored("© Toni Wahrstaetter 2023\n", "green", attrs=["dark","bold"])) + print(colored(c, "green", attrs=["dark", "bold"])) + print( + colored("© Toni Wahrstaetter 2023\n", "green", attrs=["dark", "bold"]) + ) print("Starting datafarm...") print(f"Storage location: ../{LOCATION}/") + class ContractLoadingInterrupted(Exception): pass From c115efd6114cc03ba33ac20e02493178f278a8eb Mon Sep 17 00:00:00 2001 From: xxxx-oooo Date: Mon, 9 Dec 2024 15:26:01 +0800 Subject: [PATCH 2/3] add dockerfile --- Dockerfile | 16 ++++++++++++++++ contracts.csv | 5 +---- contracts.csv.bk | 4 ++++ src/utils.py | 10 ++++------ 4 files changed, 25 insertions(+), 10 deletions(-) create mode 100644 Dockerfile create mode 100644 contracts.csv.bk diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2be5365 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11.10-slim-bullseye +ENV PYTHONUNBUFFERED 1 + +COPY . /opt/ethereum-datafarm +WORKDIR /opt/ethereum-datafarm + +RUN pip install -U pip +RUN pip install setuptools_scm +RUN pip install -r requirements.txt + +# cleanup +RUN rm -rf /var/lib/apt/lists/* +RUN pip cache purge + +# add user +RUN useradd -s /sbin/nologin -u 1001 -d /opt/ethereum-datafarm datafarm diff --git a/contracts.csv b/contracts.csv index 9e3194a..a0be937 100644 --- a/contracts.csv +++ b/contracts.csv @@ -1,4 +1 @@ -0x283Af0B28c62C092C9727F1Ee09c02CA627EB7F5,ens_registrar,NameRegistered(string,bytes32,address,uint256,uint256),14363664,5000 -0xdAC17F958D2ee523a2206206994597C13D831ec7,tether,Transfer(address,address,uint256),14363664,20 -0x6B175474E89094C44Da98b954EedeAC495271d0F,dai,Transfer(address,address,uint256),14363664,500 -0x5ef30b9986345249bc32d8928B7ee64DE9435E39,makerdao,NewCdp(address,address,uint256),14363664,5000 +0x55d398326f99059ff775485246999027b3197955,BEP20USDT,Transfer(address,address,uint256),176416,20 diff --git a/contracts.csv.bk b/contracts.csv.bk new file mode 100644 index 0000000..9e3194a --- /dev/null +++ b/contracts.csv.bk @@ -0,0 +1,4 @@ +0x283Af0B28c62C092C9727F1Ee09c02CA627EB7F5,ens_registrar,NameRegistered(string,bytes32,address,uint256,uint256),14363664,5000 +0xdAC17F958D2ee523a2206206994597C13D831ec7,tether,Transfer(address,address,uint256),14363664,20 +0x6B175474E89094C44Da98b954EedeAC495271d0F,dai,Transfer(address,address,uint256),14363664,500 +0x5ef30b9986345249bc32d8928B7ee64DE9435E39,makerdao,NewCdp(address,address,uint256),14363664,5000 diff --git a/src/utils.py b/src/utils.py index fb5c8e0..7086e28 100644 --- a/src/utils.py +++ b/src/utils.py @@ -37,9 +37,7 @@ LOCATION = vars(_args)["location"] CORES = int(vars(_args)["cores"]) LOGGING = bool(vars(_args)["log"]) - -with open("../key/key.txt", "r") as file: - KEY = file.read() +KEY = os.getenv("ETHSCAN_KEY") if not os.path.isdir("../abis"): os.mkdir("../abis") @@ -51,7 +49,7 @@ os.mkdir("../tmp") PAYLOAD = ( - "https://api.etherscan.io/api" + "https://api.bscscan.com/api" + "?module=logs" + "&action=getLogs" + "&fromBlock={}" @@ -176,7 +174,7 @@ def convert_to(bytes32inHex, toType): def latest_block(): payload = ( - "https://api.etherscan.io/api" + "https://api.bscscan.com/api" + "?module=block" + "&action=getblocknobytime" + f"×tamp={round(datetime.timestamp(datetime.now()))}" @@ -268,7 +266,7 @@ def get_abi(contract): return get_abi(contract) except: - esc = f"https://api.etherscan.io/api?module=contract&action=getabi&address={contract.address}&apikey={KEY}" + esc = f"https://api.bscscan.com/api?module=contract&action=getabi&address={contract.address}&apikey={KEY}" res = requests.get(esc) time.sleep(1) abi = json.loads(res.content)["result"] From 4e8259b1c89b4da264fd04d6d7208b1a27234d36 Mon Sep 17 00:00:00 2001 From: xxxx-oooo Date: Mon, 9 Dec 2024 20:21:44 +0800 Subject: [PATCH 3/3] update dockerfile --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 2be5365..6d7d83f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,3 +14,5 @@ RUN pip cache purge # add user RUN useradd -s /sbin/nologin -u 1001 -d /opt/ethereum-datafarm datafarm + +VOLUME "data/"