-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathduplicateScriptsApprox.py
More file actions
116 lines (95 loc) · 3.87 KB
/
duplicateScriptsApprox.py
File metadata and controls
116 lines (95 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from difflib import SequenceMatcher
import json
#import zipfile
import sys
N_BLOCKS = 6
LOOP_BLOCKS = ["control_repeat", "control_forever", "control_if", "control_if_else", "control_repeat_until"]
def find_dups(blocks):
"""
Given blocks, which is a list of sequences of blocks
Returns those subsequences that are duplicated
"""
return_list = []
for i in range(len(blocks)):
for j in range(i + 1, len(blocks)):
s = SequenceMatcher(None, blocks[i], blocks[j])
match = s.find_longest_match(0, len(blocks[i]), 0, len(blocks[j]))
if match.size >= N_BLOCKS:
return_list.append(blocks[i][match.a:match.a + match.size])
return return_list
class DuplicateScripts:
"""
Analyzer of duplicate scripts in sb3 projects sb3
New version for Scratch 3.0
"""
def __init__(self):
# self.blocks_dict = {}
# self.all_blocks = []
self.list_duplicate = []
self.blocks_dup = {}
# self.list_duplicate_string = []
def analyze(self, filename):
"""TODO"""
if filename.endswith(".zip"):
zip_file = zipfile.ZipFile(filename, "r")
json_project = json.loads(zip_file.open("project.json").read())
elif filename.endswith(".json"):
json_project = json.loads(open(filename).read())
scripts_dict = {}
# Loops through all sprites
for sprites_dict in json_project["targets"]:
sprite = sprites_dict["name"]
blocks_dict = {}
scripts_dict[sprite] = []
# Gets all blocks out of sprite
for blocks, blocks_value in sprites_dict["blocks"].items():
if isinstance(blocks_value, dict):
blocks_dict[blocks] = blocks_value
opcode_dict = {} # block id -> opcode
toplevel_list = [] # list of top-level block ids
tmp_blocks = []
for block_id, block in blocks_dict.items():
opcode_dict[block_id] = block["opcode"]
if block["topLevel"]:
if tmp_blocks:
scripts_dict[sprite].append(tmp_blocks)
toplevel_list.append(block_id)
tmp_blocks = [block["opcode"]]
else:
tmp_blocks.append(block["opcode"])
scripts_dict[sprite].append(tmp_blocks)
# print(scripts_dict)
# Intra-sprite
self.intra_dups_list = []
for sprite in scripts_dict:
blocks = scripts_dict[sprite]
dups = find_dups(blocks)
if dups:
self.intra_dups_list.append(dups[0])
# Project-wide
self.project_dups_list = []
blocks = []
for sprite in scripts_dict:
blocks += scripts_dict[sprite]
self.project_dups_list = find_dups(blocks)
def finalize(self, filename):
"""Output the duplicate scripts detected."""
with open(filename.replace('.json', '') + '-sprite.json', 'w') as outfile:
json.dump(self.intra_dups_list, outfile)
with open(filename.replace('.json', '') + '-project.json', 'w') as outfile:
json.dump(self.project_dups_list, outfile)
# count = sum([len(listElem) for listElem in self.intra_dups_list])
count = len(self.intra_dups_list)
result = ("{} intra-sprite duplicate scripts found\n".format(count))
result += ("%d project-wide duplicate scripts found\n" % len(self.project_dups_list))
return result
def main(filename):
"""The entrypoint for the 'duplicateScripts' extension"""
duplicate = DuplicateScripts()
print("Looking for duplicate scripts in", filename)
print("Minimum number of blocks:", N_BLOCKS)
print()
duplicate.analyze(filename)
print(duplicate.finalize(filename))
if __name__ == "__main__":
main(sys.argv[1])