Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 57 additions & 57 deletions AnalyzePDF.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

"""
Analyzes PDF files by looking at their characteristics in order to add some intelligence into the determination of them being malicious or benign.
Expand Down Expand Up @@ -28,7 +28,7 @@
# Version 0.2
# Date: 10-11-2012
# Requirements:
# - Python 2.x
# - Python 3.x
# - YARA (http://plusvic.github.io/yara/)
# - pdfid (http://blog.didierstevens.com/programs/pdf-tools/)
# Optional:
Expand Down Expand Up @@ -61,12 +61,12 @@
try:
import pdfid
except ImportError:
print "[!] PDFiD not installed"
print("[!] PDFiD not installed")
sys.exit()
try:
import yara
except ImportError:
print "[!] Yara not installed"
print("[!] Yara not installed")
sys.exit()

# Initialize the list(s) where PDF attribs will be added to
Expand All @@ -88,7 +88,7 @@

# Verify supplied path exists or die
if not os.path.exists(args['Path']):
print "[!] The supplied path does not exist"
print("[!] The supplied path does not exist")
sys.exit()

# Configure YARA rules
Expand All @@ -98,15 +98,15 @@
rules = '/usr/local/etc/capabilities.yara' # REMnux location

if not os.path.exists(rules):
print "[!] Correct path to YARA rules?"
print("[!] Correct path to YARA rules?")
sys.exit()
else:
try:
r = yara.compile(rules)
if args['move']:
ydir = args['move']
except Exception, msg:
print "[!] YARA compile error: %s" % msg
except Exception as msg:
print("[!] YARA compile error: %s" % msg)
sys.exit()

def main():
Expand All @@ -129,8 +129,8 @@ def sha256(pdf):
data = f.read()
sha256 = hashlib.sha256(data).hexdigest()
f.close()
except Exception, msg:
print msg
except Exception as msg:
print(msg)

return sha256

Expand All @@ -144,11 +144,11 @@ def fileID(pdf):
"""
f = open(pdf,'rb')
s = f.read(1024)
if '\x25\x50\x44\x46' in s:
print "\n" + trailer
print "[+] Analyzing: %s" % pdf
print filler
print "[-] Sha256: %s" % sha256(pdf)
if b'\x25\x50\x44\x46' in s:
print("\n" + trailer)
print("[+] Analyzing: %s" % pdf)
print(filler)
print("[-] Sha256: %s" % sha256(pdf))
info(pdf)
elif os.path.isdir(pdf): pwalk(pdf)
f.close()
Expand All @@ -171,18 +171,18 @@ def info(pdf):
for line in p.stderr:
if re.search('Unterminated hex string|Loop in Pages tree|Illegal digit in hex char in name', line):
counter.append("sketchy")
print "[-] Sketchyness detected"
print("[-] Sketchyness detected")
elif re.search('Unexpected end of file in flate stream|End of file inside array', line):
counter.append("eof")
print "[-] EoF problem"
print("[-] EoF problem")
elif re.search('Couldn\'t find trailer dictionary', line):
counter.append("trailer")
elif re.search('Invalid XRef entry|No valid XRef size in trailer|Invalid XRef entry|Couldn\'t read xref table', line):
counter.append("xref")
print "[-] Invalid XREF"
print("[-] Invalid XREF")
break
except Exception, msg:
print "[!] pdfinfo error: %s" % msg
except Exception as msg:
print("[!] pdfinfo error: %s" % msg)
pass

id(pdf)
Expand All @@ -195,51 +195,51 @@ def id(pdf):
except Exception:
# I've observed some files raising errors with the 'extraData' switch
command = pdfid.PDFiD2String(pdfid.PDFiD(pdf, True, False, False, True), True)
print "[!] PDFiD couldn\'t parse extra data"
print("[!] PDFiD couldn\'t parse extra data")
extra = False

for line in command.split('\n'):
count = re.split(r'[\s]+', line)
if "PDF Header" in line and not re.match('%PDF-1\.\d', count[3]):
counter.append("header")
print "[-] Invalid version number : \"%s\"" % count[3]
print("[-] Invalid version number : \"%s\"" % count[3])
elif "/Page " in line:
page_counter.append(count[2])
elif "/Pages " in line:
page_counter.append(count[2])
elif "/JS " in line and not re.match('0', count[2]):
counter.append("js")
print "[-] JavaScript count.......: %s" % count[2]
print("[-] JavaScript count.......: %s" % count[2])
if count[2] > "1":
counter.append("mucho_javascript")
print "\t[*] That\'s a lot of js ..."
print("\t[*] That\'s a lot of js ...")
elif "/AcroForm " in line and not re.match('0', count[2]):
counter.append("acroform")
print "[-] AcroForm...............: %s" % count[2]
print("[-] AcroForm...............: %s" % count[2])
elif "/AA " in line and not re.match('0', count[2]):
counter.append("aa")
print "[-] Additional Action......: %s" % count[2]
print("[-] Additional Action......: %s" % count[2])
elif "/OpenAction " in line and not re.match('0', count[2]):
counter.append("oa")
print "[-] Open Action............: %s" % count[2]
print("[-] Open Action............: %s" % count[2])
elif "/Launch " in line and not re.match('0', count[2]):
counter.append("launch")
print "[-] Launch Action..........: %s" % count[2]
print("[-] Launch Action..........: %s" % count[2])
elif "/EmbeddedFiles " in line and not re.match('0', count[2]):
counter.append("embed")
print "[-] Embedded File..........: %s" % count[2]
print("[-] Embedded File..........: %s" % count[2])
#elif "trailer" in line and not re.match('0|1', count[2]):
# print "[-] Trailer count..........: %s" % count[2]
# print "\t[*] Multiple versions detected"
elif "Total entropy:" in line:
tentropy = count[3]
print "[-] Total Entropy..........: %7s" % count[3]
print("[-] Total Entropy..........: %7s" % count[3])
elif "Entropy inside streams:" in line:
ientropy = count[4]
print "[-] Entropy inside streams : %7s" % count[4]
print("[-] Entropy inside streams : %7s" % count[4])
elif "Entropy outside streams:" in line:
oentropy = count[4]
print "[-] Entropy outside streams: %7s" % count[4]
print("[-] Entropy outside streams: %7s" % count[4])
"""
Entropy levels:
0 = orderly, 8 = random
Expand Down Expand Up @@ -269,55 +269,55 @@ def id(pdf):
if togo > 2:
if oe_long + 2 > te_long:
counter.append("entropy")
print "\t[*] Entropy of outside stream is questionable:"
print "\t[-] Outside (%s) +2 (%s) > Total (%s)" % (oe_long,oe_long +2,te_long)
print("\t[*] Entropy of outside stream is questionable:")
print("\t[-] Outside (%s) +2 (%s) > Total (%s)" % (oe_long,oe_long +2,te_long))
elif oe_long > te_long:
counter.append("entropy")
print "\t[*] Entropy of outside stream is questionable:"
print "\t[-] Outside (%s) > Total (%s)" % (oe_long,te_long)
print("\t[*] Entropy of outside stream is questionable:")
print("\t[-] Outside (%s) > Total (%s)" % (oe_long,te_long))
if str(te_short) <= "2.0" or str(ie_short) <= "2.0":
counter.append("entropy")
print "\t[*] LOW entropy detected:"
print "\t[-] Total (%s) or Inside (%s) <= 2.0" % (te_short,ie_short)
print("\t[*] LOW entropy detected:")
print("\t[-] Total (%s) or Inside (%s) <= 2.0" % (te_short,ie_short))

# Process the /Page(s) results here just to make sure they were both read
if re.match('0', page_counter[0]) and re.match('0', page_counter[1]):
counter.append("page")
print "[-] Page count suspicious:"
print "\t[*] Both /Page (%s) and /Pages (%s) = 0" % (page_counter[0],page_counter[1])
print("[-] Page count suspicious:")
print("\t[*] Both /Page (%s) and /Pages (%s) = 0" % (page_counter[0],page_counter[1]))
elif re.match('0', page_counter[0]) and not re.match('0', page_counter[1]):
counter.append("page")
print "[-] Page count suspicious, no individual pages defined:"
print "\t[*] /Page = (%s) , /Pages = (%s)" % (page_counter[0],page_counter[1])
print("[-] Page count suspicious, no individual pages defined:")
print("\t[*] /Page = (%s) , /Pages = (%s)" % (page_counter[0],page_counter[1]))
elif re.match('1$', page_counter[0]):
counter.append("page")
print "[-] (1) page PDF"
print("[-] (1) page PDF")

yarascan(pdf)

def yarascan(pdf):
try:
ymatch = r.match(pdf)
if len(ymatch):
print "[-] YARA hit(s): %s" % ymatch
print("[-] YARA hit(s): %s" % ymatch)
for rule in ymatch:
meta = rule.meta
for key, value in meta.iteritems():
for key, value in meta.items():
# If the YARA rule has a weight in it's metadata then parse that for later calculation
if "weight" in key:
yscore.append(value)
if not ydir == False:
print "[-] Moving malicious file to:",ydir
print("[-] Moving malicious file to:",ydir)
# This will move the file if _any_ YARA rule triggers...which might trick you if the
# rule that triggers on it doesn't have a weight or is displayed in the output
if not os.path.exists(ydir):
os.makedirs(ydir)
try:
shutil.move(pdf, ydir)
except Exception, msg:
except Exception as msg:
continue
except Exception, msg:
print msg
except Exception as msg:
print(msg)

eval(counter)

Expand All @@ -328,9 +328,9 @@ def eval(counter):

Rating system: 0 (benign), >=2 (sketchy), >=3 (medium), >=5 (high)
"""
print filler
print(filler)
ytotal = sum(yscore)
print "[-] Total YARA score.......: %s" % ytotal
print("[-] Total YARA score.......: %s" % ytotal)
sev = 0

# Below are various combinations used to add some intelligence and help evaluate if a file is malicious or benign.
Expand Down Expand Up @@ -372,14 +372,14 @@ def eval(counter):
if "page" in counter and "header" in counter: sev += 1
if "header" in counter and "embed" in counter: sev += 1

print "[-] Total severity score...: %s" % sev
print("[-] Total severity score...: %s" % sev)
sev = (ytotal + sev)
print "[-] Overall score..........: %s" % sev
print("[-] Overall score..........: %s" % sev)

if sev >= 5: print trailer + "\n[!] HIGH probability of being malicious"
elif sev >= 3: print trailer + "\n[!] MEDIUM probability of being malicious"
elif sev >= 2: print trailer + "\n[!] Heuristically sketchy"
elif sev >= 0: print trailer + "\n[-] Scanning didn't determine anything warranting suspicion"
if sev >= 5: print(trailer + "\n[!] HIGH probability of being malicious")
elif sev >= 3: print(trailer + "\n[!] MEDIUM probability of being malicious")
elif sev >= 2: print(trailer + "\n[!] Heuristically sketchy")
elif sev >= 0: print(trailer + "\n[-] Scanning didn't determine anything warranting suspicion")

# Clear out the scores to start fresh for the next analysis
del counter[:]
Expand Down
2 changes: 1 addition & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Requirements
------------
* pdfid
* pdfinfo
* yara
* yara-python

Usage
-----
Expand Down
4 changes: 2 additions & 2 deletions pdf_rules.yara
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ rule multiple_filtering : PDF

strings:
$magic = { 25 50 44 46 }
$attrib = /\/Filter.*?(\/ASCIIHexDecode\W+|\/LZWDecode\W+|\/ASCII85Decode\W+|\/FlateDecode\W+|\/RunLengthDecode){2}/
$attrib = /\/Filter.*(\/ASCIIHexDecode\W+|\/LZWDecode\W+|\/ASCII85Decode\W+|\/FlateDecode\W+|\/RunLengthDecode){2}/
// left out: /CCITTFaxDecode, JBIG2Decode, DCTDecode, JPXDecode, Crypt

condition:
Expand Down Expand Up @@ -394,7 +394,7 @@ rule invalid_xref_numbers : PDF
strings:
$magic = { 25 50 44 46 }
$reg0 = /xref\r?\n?.*\r?\n?.*65535\sf/
$reg1 = /endstream.*?\r?\n?endobj.*?\r?\n?startxref/
$reg1 = /endstream.*\r?\n?endobj.*\r?\n?startxref/
condition:
$magic at 0 and not $reg0 and not $reg1
}
Expand Down