Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 46 additions & 12 deletions batch_deobfuscator/batch_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,30 +336,56 @@ def get_commands(self, logical_line):
if line_is_comment(logical_line):
yield logical_line.strip()
return
state = "init"

stack = ["init"]
counter = 0
start_command = 0
var_start = None

for char in logical_line:
# print(f"C:{char}, S:{state}")
state = stack[-1]

if state == "init":
if char == '"': # quote is on
state = "str_s"
if char == '"':
stack.append("str_s")
elif char == "%":
stack.append("var_s")
var_start = counter
elif char == "^":
state = "escape"
stack.append("escape")
elif char == "&" and logical_line[counter - 1] == ">":
# Usually an output redirection, we want to keep it on the same line
pass
elif char == "&" or char == "|":
elif char in ("&", "|"):
cmd = logical_line[start_command:counter].strip()
if cmd != "":
for part in self.get_commands_special_statement(cmd):
yield part
start_command = counter + 1

elif state == "str_s":
if char == '"':
state = "init"
stack.pop()
elif char == "%":
stack.append("var_s")
var_start = counter

elif state == "var_s":
if char == "%":
# Inspect variable contents for something that affects parsing
stack.pop() # get rid of var_s
if var_start < counter: # at least one character between percents?
value = self.get_value(logical_line[var_start:counter + 1])
if value.count('"') == 1:
if stack[-1] != "str_s":
stack.append("str_s")
else: # end of quoted data reached
stack.pop()
elif value == "^":
stack.append("escape")

elif state == "escape":
state = "init"
stack.pop()

counter += 1

Expand Down Expand Up @@ -951,7 +977,7 @@ def percent_tilde(self, argument):
return value if value else "script.bat"

# pushdown automata
def normalize_command(self, command):
def normalize_command(self, command, replace_by_space=True):
if line_is_comment(command):
return command

Expand All @@ -965,7 +991,8 @@ def normalize_command(self, command):
if char == '"': # quote is on
state = "str_s"
normalized_com += char
elif char == "," or char == ";": # or char == "\t": EDIT: How about we keep those tabs?
# or char == "\t": EDIT: How about we keep those tabs?
elif (char == "," or char == ";") and replace_by_space:
# commas (",") are replaced by spaces, unless they are part of a string in doublequotes
# semicolons (";") are replaced by spaces, unless they are part of a string in doublequotes
# tabs are replaced by a single space
Expand Down Expand Up @@ -1012,9 +1039,16 @@ def normalize_command(self, command):
normalized_com = normalized_com[:variable_start]
if len(normalized_com) == 0:
traits["start_with_var"] = True
normalized_com += self.normalize_command(value)
normalized_com += self.normalize_command(value, replace_by_space=False)
traits["var_used"] += 1
state = stack.pop()
prev = stack.pop()
if value.count('"') == 1:
if prev != "str_s":
state = "str_s"
else: # end of quoted data reached
state = "init"
else:
state = prev
elif char == "%": # Two % in a row
normalized_com += char
state = stack.pop()
Expand Down
21 changes: 21 additions & 0 deletions tests/test_full_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,24 @@ def test_concat_logical_lines():
rb'curl -X GET --fail -H "Accept: application/octet-stream" '
rb"http://server.org/data?accept=data >>met\resultat\output.log"
)


def test_no_substituted_quote_command_splitting():
deobfuscator = BatchDeobfuscator()
script = rb"""set QUO="
set %QUO%DATA=bla | foo;bar%QUO%"""
with tempfile.TemporaryDirectory() as temp_dir:
with tempfile.NamedTemporaryFile(dir=temp_dir) as tf:
tf.write(script)
tf.flush()
bat_filename, _ = deobfuscator.analyze(tf.name, temp_dir)

with open(os.path.join(temp_dir, bat_filename), "rb") as f:
result = f.read()
lines = result.split(b"\r\n")

assert len(lines) >= 2
assert lines[0] == b'set QUO="'
# 1. Must not split at |
# 2. Must not replace ; by space
assert lines[1] == b'set "DATA=bla | foo;bar"'
35 changes: 35 additions & 0 deletions tests/test_unittests.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ def test_simple_set_a():
# ('set EXP=43^"|', "echo *%EXP%*", []),
# ('set EXP=43"^|', "echo *%EXP%*", 'echo *43"^|*'),
# ('set EXP=43"^^|', "echo *%EXP%*", 'echo *43"^^|*'),
# Comma in value
('set EXP=4,3', "echo *%EXP%*", "echo *4,3*"),
('set "EXP=4,3"', "echo *%EXP%*", "echo *4,3*"),
# Getting into really weird stuff
("set EXP=4=3", "echo *%EXP%*", "echo *4=3*"),
('set ""EXP=43"', 'echo *%"EXP%*', "echo *43*"),
Expand Down Expand Up @@ -755,3 +758,35 @@ def test_keep_quotes_on_set():
cmd = 'set "ab= ""'
res = deobfuscator.normalize_command(cmd)
assert res == cmd

@staticmethod
@pytest.mark.parametrize(
"cmd, command_list",
[
(
'set %QUO%DATA=bla | foo%QUO% & bar',
['set %QUO%DATA=bla | foo%QUO%', 'bar'],
),
(
'set "DATA=bla | foo%QUO% & bar',
['set "DATA=bla | foo%QUO%', 'bar'],
),
(
'set %QUO%DATA=bla | foo" & bar',
['set %QUO%DATA=bla | foo"', 'bar'],
),
],
)
def test_substituted_quotes_command_splitting(cmd, command_list):
deobfuscator = BatchDeobfuscator()
deobfuscator.interpret_command('set QUO="')
res = list(deobfuscator.get_commands(cmd))
assert res == command_list

@staticmethod
def test_substituted_escape_command_splitting():
deobfuscator = BatchDeobfuscator()
deobfuscator.interpret_command('set ESCP=^^')
cmd = 'echo a %ESCP%| b'
res = list(deobfuscator.get_commands(cmd))
assert res == [cmd]