-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuffer.txt
More file actions
225 lines (224 loc) · 11.1 KB
/
buffer.txt
File metadata and controls
225 lines (224 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
5a6,7
> print(asm.abi, asm.abi.linux_x32_abi)
> asm.abi = asm.abi.linux_x32_abi
7a10,11
> from math import prod
> import archinfo
115a120
> print(init_state.arch.memory_endness)
162a168
> regvars_names = {}
163a170
> read_locations = {}
172a180
> regvars_names[name] = v
180,181c188,189
< #print('track reads called', s.inspect.mem_read_address, s.inspect.mem_read_length)
< v = s.memory.load(s.inspect.mem_read_address, s.inspect.mem_read_length, disable_actions=True, inspect=False)
---
> print('track reads called', s.inspect.mem_read_address, s.inspect.mem_read_length)
> v = s.memory.load(s.inspect.mem_read_address, s.inspect.mem_read_length, disable_actions=True, inspect=False, endness=archinfo.Endness.LE)
191a200,204
> if s.inspect.mem_read_address not in read_locations:
> #print('new read')
> read_locations[s.inspect.mem_read_address] = v
> #print(s.inspect.mem_read_address,'has',v)
>
207c220
< print('step'r
---
> print('step')
220a234
> orig_state = state.copy()
228,229c242,249
< esp_var = [k for k, v in register_variables.items() if v=='esp'][0]
< ebp_var = [k for k, v in register_variables.items() if v=='ebp'][0]
---
> esp_var = regvars_names['esp']
> ebp_var = regvars_names['ebp']
> #print('track reads result', read_locations)
> def simplify_val(expr):
> try:
> return state.solver.eval_one(expr)
> except (angr.errors.SimUnsatError, angr.errors.SimValueError):
> return expr
236c256
< if (expr==esp_var).is_true() or (expr==ebp_var).is_true(): # best way to compare BVSs for now
---
> if (expr is esp_var) or (expr is ebp_var): # best way to compare BVSs for now
238,241c258
< try:
< return state.solver.eval_one(expr)
< except (angr.errors.SimUnsatError, angr.errors.SimValueError):
< return expr
---
> return simplify_val(expr)
255a273,274
> combined_lookup = {}
> new_state = {}
257,259c276,292
< combined.append((k,v))
< for k,v in new_memwrites.items():
< combined.append((k,v))
---
> if v is not getattr(orig_state.regs, k):
> #print('change',k)
> combined.append((k,v))
> combined_lookup[k] = v
> new_state[k] = regvars_names[k]
> #print(register_variables, new_regs)
> #print('orig', (orig_state.regs.eax!=new_regs['eax']).is_true())
> for k,v in new_memwrites.items():
> load_val = orig_state.memory.load(k, len(v)//8, endness=archinfo.Endness.LE)
> if (v is not load_val): # memory isn't saved at the beginning because we don't know where it's going to get written to
> #print('change', k)
> combined.append((k,v))
> combined_lookup[k] = v
> new_state[k] = load_val
> for k,v in read_locations.items():
> #print('mem variable',k,v)
> new_state[k] = v
266c299
< # edge u to v means that v depends on u (u is "feeding into" v)
---
> # an edge u to v means that v depends on u (u is "feeding into" v)
270a304
> #print('adding', ident, expr)
274,277c308,347
< if ast.op == 'BVS':
< dg.add_edge(filter(lambda x: x[1], combined)[0], ident)
< print(dg)
< def synthesize(expr):
---
> #print('ident',ident,ast,regvars_names[ident])
> if ast.op == 'BVS': # we also want to know if it depends on itself because sometimes it doesn't
> dg.add_edge(list(filter(lambda x: x[1] is expr, combined))[0][0], ident)
> if type(ident) is not str and ident.symbolic:
> for ast in ident.children_asts():
> if ast.op == 'BVS':
> dg.add_edge(list(filter(lambda x: x[1] is expr, combined))[0][0], ident)
> print('PE BLOCK @', orig_state.regs.eip)
> #print('graph', dg.nodes, dg.edges)
> def find_source(target_expr, only_reg=False):
> sols=[]
> for ident, expr in new_state.items():
> #print(ident,expr,target_expr, target_expr is expr, only_reg, ((not only_reg) or (type(ident) is str)))
> if target_expr is expr and ((not only_reg) or (type(ident) is str)):
> sols.append(ident)
> print(target_expr,only_reg,'sols',sols,new_state)
> return sols
> def reduceasm(x):
> if type(x) is list:
> return x[0]
> else:
> return x
> def target_to_asm(target):
> if type(target) is str: return getattr(asm.registers, target)
> elif type(target) is GeneralPurposeRegister32: return target
> else:
> if target.op=='BVV': return target.v
> elif target.op=='BVS':
> #print(target, find_source(target), new_state)
> t = target_to_asm(find_source(target, True)[0])
> #assert type(t) is str
> assert type(t) is GeneralPurposeRegister32
> #print('returning',t)
> return [t]
> elif target.op=='__add__': return [sum(map(lambda x: reduceasm(target_to_asm(x)), target.args))]
> elif target.op=='__sub__': return [reduceasm(target_to_asm(target.args[0]))-reduceasm(target_to_asm(target.args[1]))]
> elif target.op=='__mul__': return [prod(map(lambda x: reduceasm(target_to_asm(x)), target.args))]
> else: raise Exception('unknown op in target_to_asm '+target.op)
> def synthesize(expr, target):
> code = b''
280,281c350,429
<
< if False:#len(new_code) < segments[i]:
---
> t_asm = target_to_asm(target)
> # FUTUREDO: what happens if the shift is repeated
> print('target', target)
> """if op == 'LShR':
> if hasattr(args[1], 'symbolic') and args[1].symbolic:
> code += synthesize(args[1], cl)
> shift_amt = cl
> else:
> shift_amt = simped
> if hasattr(target, 'symbolic'):
> if target.symbolic:
> code += synthesize(target)
> else:
> code += SHR(target, shift_amt)
> else:
> code += SHR(target, shift_amt)
> elif op == '__lshift__':
> if args[1].symbolic:
> code += synthesize(args[1], cl)
> code += SHL(target, cl).encode()
> else:
> code += SHR(target, simplify_val(args[1]))
> elif op == '__invert__':"""
> if op in ('__add__', '__sub__', '__imul__'):
> if op == '__imul__' and args[1].op=='BVV' and len(find_source(args[0]))>0:
> code += IMUL(target_to_asm(target), target_to_asm(find_source(args[0])[0]), args[1].v).encode()
> new_state[target] = args[0]*args[1]
> # TODO: commutativity?
> elif op == '__imul__' and target in find_source(args[0]) and len(find_source(args[1]))>0:
> code += IMUL(target_to_asm(target), target_to_asm(find_source(args[1])[0])).encode()
> new_state[target] = find_source(args[0])
> else:
> code += synthesize(args[0], target)
> new_state[target] = args[0]
> new_state[target] = expr[0]
> for arg in args[1:] if op != '__imul__' else args[2:]:
> # if we can find source use it
> # TODO: what happens if we need a scratch register? Use XMM instead?
> # Whatever just find source
> # TODO: handle memory to memory
> # so we find a source and add it, easy enough
> code += ({'__add__':ADD,'__sub__':SUB,'__mul__':IMUL})[op](target_to_asm(target), target_to_asm(target), target_to_asm(find_source(arg))).encode()
> new_state[target] *= arg
> elif op == 'BVV':
> code = MOV(target_to_asm(target), expr.v).encode()
> new_state[target] = expr.v
> elif op == 'BVS':
> print('mov',target,expr)
> code = MOV(target_to_asm(target), target_to_asm(find_source(expr)[0])).encode()
> new_state[target] = expr
> else:
> raise Exception('Unimplemented operation')
> return code
>
> # go through every weakly connected component
> #print('components', list(nx.weakly_connected_components(dg)))
> #print('starting', new_state)
> for comp in nx.weakly_connected_components(dg):
> #print(comp)
> # do the nodes that have the highest indegrees (dependencies) first
> sorted_nodes = sorted(dg.nodes, key=lambda x: dg.out_degree(x))
> #print(sorted_nodes)
> #pass
> for node in sorted_nodes:
> # 1. check dependencies, and preserve if needed
> # FUTUREDO: actually implement this
> # FUTUREDO: maybe account for indirect dependencies (i.e. different registers but the same value)?
>
> # 2. generate code
> #print('pe', node)
> target = node
> if type(target) is str:
> target_val = getattr(asm, target)
> else:
> target_val = target
> new_code += synthesize(combined_lookup[node], target_val)
> new_state[node] = combined_lookup[node]
> end_eip = state.solver.eval(state.regs.eip)
> size = end_eip - start_eip # it's linear because otherwise there would be a jump
> if len(new_code) < size:
283,284d430
< end_eip = state.solver.eval(state.regs.eip)
< size = start_eip - end_eip
287,288c433
<
< #print('new eip', result[0].regs.eip)
---
> #print('aaaaaaa', result[0].regs.eip)