diff --git a/AnalyzePDF.py b/AnalyzePDF.py index dd7f194..4313e70 100755 --- a/AnalyzePDF.py +++ b/AnalyzePDF.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Analyzes PDF files by looking at their characteristics in order to add some intelligence into the determination of them being malicious or benign. @@ -28,7 +28,7 @@ # Version 0.2 # Date: 10-11-2012 # Requirements: -# - Python 2.x +# - Python 3.x # - YARA (http://plusvic.github.io/yara/) # - pdfid (http://blog.didierstevens.com/programs/pdf-tools/) # Optional: @@ -61,12 +61,12 @@ try: import pdfid except ImportError: - print "[!] PDFiD not installed" + print("[!] PDFiD not installed") sys.exit() try: import yara except ImportError: - print "[!] Yara not installed" + print("[!] Yara not installed") sys.exit() # Initialize the list(s) where PDF attribs will be added to @@ -88,7 +88,7 @@ # Verify supplied path exists or die if not os.path.exists(args['Path']): - print "[!] The supplied path does not exist" + print("[!] The supplied path does not exist") sys.exit() # Configure YARA rules @@ -98,15 +98,15 @@ rules = '/usr/local/etc/capabilities.yara' # REMnux location if not os.path.exists(rules): - print "[!] Correct path to YARA rules?" + print("[!] Correct path to YARA rules?") sys.exit() else: try: r = yara.compile(rules) if args['move']: ydir = args['move'] - except Exception, msg: - print "[!] YARA compile error: %s" % msg + except Exception as msg: + print("[!] YARA compile error: %s" % msg) sys.exit() def main(): @@ -129,8 +129,8 @@ def sha256(pdf): data = f.read() sha256 = hashlib.sha256(data).hexdigest() f.close() - except Exception, msg: - print msg + except Exception as msg: + print(msg) return sha256 @@ -144,11 +144,11 @@ def fileID(pdf): """ f = open(pdf,'rb') s = f.read(1024) - if '\x25\x50\x44\x46' in s: - print "\n" + trailer - print "[+] Analyzing: %s" % pdf - print filler - print "[-] Sha256: %s" % sha256(pdf) + if b'\x25\x50\x44\x46' in s: + print("\n" + trailer) + print("[+] Analyzing: %s" % pdf) + print(filler) + print("[-] Sha256: %s" % sha256(pdf)) info(pdf) elif os.path.isdir(pdf): pwalk(pdf) f.close() @@ -171,18 +171,18 @@ def info(pdf): for line in p.stderr: if re.search('Unterminated hex string|Loop in Pages tree|Illegal digit in hex char in name', line): counter.append("sketchy") - print "[-] Sketchyness detected" + print("[-] Sketchyness detected") elif re.search('Unexpected end of file in flate stream|End of file inside array', line): counter.append("eof") - print "[-] EoF problem" + print("[-] EoF problem") elif re.search('Couldn\'t find trailer dictionary', line): counter.append("trailer") elif re.search('Invalid XRef entry|No valid XRef size in trailer|Invalid XRef entry|Couldn\'t read xref table', line): counter.append("xref") - print "[-] Invalid XREF" + print("[-] Invalid XREF") break - except Exception, msg: - print "[!] pdfinfo error: %s" % msg + except Exception as msg: + print("[!] pdfinfo error: %s" % msg) pass id(pdf) @@ -195,51 +195,51 @@ def id(pdf): except Exception: # I've observed some files raising errors with the 'extraData' switch command = pdfid.PDFiD2String(pdfid.PDFiD(pdf, True, False, False, True), True) - print "[!] PDFiD couldn\'t parse extra data" + print("[!] PDFiD couldn\'t parse extra data") extra = False for line in command.split('\n'): count = re.split(r'[\s]+', line) if "PDF Header" in line and not re.match('%PDF-1\.\d', count[3]): counter.append("header") - print "[-] Invalid version number : \"%s\"" % count[3] + print("[-] Invalid version number : \"%s\"" % count[3]) elif "/Page " in line: page_counter.append(count[2]) elif "/Pages " in line: page_counter.append(count[2]) elif "/JS " in line and not re.match('0', count[2]): counter.append("js") - print "[-] JavaScript count.......: %s" % count[2] + print("[-] JavaScript count.......: %s" % count[2]) if count[2] > "1": counter.append("mucho_javascript") - print "\t[*] That\'s a lot of js ..." + print("\t[*] That\'s a lot of js ...") elif "/AcroForm " in line and not re.match('0', count[2]): counter.append("acroform") - print "[-] AcroForm...............: %s" % count[2] + print("[-] AcroForm...............: %s" % count[2]) elif "/AA " in line and not re.match('0', count[2]): counter.append("aa") - print "[-] Additional Action......: %s" % count[2] + print("[-] Additional Action......: %s" % count[2]) elif "/OpenAction " in line and not re.match('0', count[2]): counter.append("oa") - print "[-] Open Action............: %s" % count[2] + print("[-] Open Action............: %s" % count[2]) elif "/Launch " in line and not re.match('0', count[2]): counter.append("launch") - print "[-] Launch Action..........: %s" % count[2] + print("[-] Launch Action..........: %s" % count[2]) elif "/EmbeddedFiles " in line and not re.match('0', count[2]): counter.append("embed") - print "[-] Embedded File..........: %s" % count[2] + print("[-] Embedded File..........: %s" % count[2]) #elif "trailer" in line and not re.match('0|1', count[2]): # print "[-] Trailer count..........: %s" % count[2] # print "\t[*] Multiple versions detected" elif "Total entropy:" in line: tentropy = count[3] - print "[-] Total Entropy..........: %7s" % count[3] + print("[-] Total Entropy..........: %7s" % count[3]) elif "Entropy inside streams:" in line: ientropy = count[4] - print "[-] Entropy inside streams : %7s" % count[4] + print("[-] Entropy inside streams : %7s" % count[4]) elif "Entropy outside streams:" in line: oentropy = count[4] - print "[-] Entropy outside streams: %7s" % count[4] + print("[-] Entropy outside streams: %7s" % count[4]) """ Entropy levels: 0 = orderly, 8 = random @@ -269,29 +269,29 @@ def id(pdf): if togo > 2: if oe_long + 2 > te_long: counter.append("entropy") - print "\t[*] Entropy of outside stream is questionable:" - print "\t[-] Outside (%s) +2 (%s) > Total (%s)" % (oe_long,oe_long +2,te_long) + print("\t[*] Entropy of outside stream is questionable:") + print("\t[-] Outside (%s) +2 (%s) > Total (%s)" % (oe_long,oe_long +2,te_long)) elif oe_long > te_long: counter.append("entropy") - print "\t[*] Entropy of outside stream is questionable:" - print "\t[-] Outside (%s) > Total (%s)" % (oe_long,te_long) + print("\t[*] Entropy of outside stream is questionable:") + print("\t[-] Outside (%s) > Total (%s)" % (oe_long,te_long)) if str(te_short) <= "2.0" or str(ie_short) <= "2.0": counter.append("entropy") - print "\t[*] LOW entropy detected:" - print "\t[-] Total (%s) or Inside (%s) <= 2.0" % (te_short,ie_short) + print("\t[*] LOW entropy detected:") + print("\t[-] Total (%s) or Inside (%s) <= 2.0" % (te_short,ie_short)) # Process the /Page(s) results here just to make sure they were both read if re.match('0', page_counter[0]) and re.match('0', page_counter[1]): counter.append("page") - print "[-] Page count suspicious:" - print "\t[*] Both /Page (%s) and /Pages (%s) = 0" % (page_counter[0],page_counter[1]) + print("[-] Page count suspicious:") + print("\t[*] Both /Page (%s) and /Pages (%s) = 0" % (page_counter[0],page_counter[1])) elif re.match('0', page_counter[0]) and not re.match('0', page_counter[1]): counter.append("page") - print "[-] Page count suspicious, no individual pages defined:" - print "\t[*] /Page = (%s) , /Pages = (%s)" % (page_counter[0],page_counter[1]) + print("[-] Page count suspicious, no individual pages defined:") + print("\t[*] /Page = (%s) , /Pages = (%s)" % (page_counter[0],page_counter[1])) elif re.match('1$', page_counter[0]): counter.append("page") - print "[-] (1) page PDF" + print("[-] (1) page PDF") yarascan(pdf) @@ -299,25 +299,25 @@ def yarascan(pdf): try: ymatch = r.match(pdf) if len(ymatch): - print "[-] YARA hit(s): %s" % ymatch + print("[-] YARA hit(s): %s" % ymatch) for rule in ymatch: meta = rule.meta - for key, value in meta.iteritems(): + for key, value in meta.items(): # If the YARA rule has a weight in it's metadata then parse that for later calculation if "weight" in key: yscore.append(value) if not ydir == False: - print "[-] Moving malicious file to:",ydir + print("[-] Moving malicious file to:",ydir) # This will move the file if _any_ YARA rule triggers...which might trick you if the # rule that triggers on it doesn't have a weight or is displayed in the output if not os.path.exists(ydir): os.makedirs(ydir) try: shutil.move(pdf, ydir) - except Exception, msg: + except Exception as msg: continue - except Exception, msg: - print msg + except Exception as msg: + print(msg) eval(counter) @@ -328,9 +328,9 @@ def eval(counter): Rating system: 0 (benign), >=2 (sketchy), >=3 (medium), >=5 (high) """ - print filler + print(filler) ytotal = sum(yscore) - print "[-] Total YARA score.......: %s" % ytotal + print("[-] Total YARA score.......: %s" % ytotal) sev = 0 # Below are various combinations used to add some intelligence and help evaluate if a file is malicious or benign. @@ -372,14 +372,14 @@ def eval(counter): if "page" in counter and "header" in counter: sev += 1 if "header" in counter and "embed" in counter: sev += 1 - print "[-] Total severity score...: %s" % sev + print("[-] Total severity score...: %s" % sev) sev = (ytotal + sev) - print "[-] Overall score..........: %s" % sev + print("[-] Overall score..........: %s" % sev) - if sev >= 5: print trailer + "\n[!] HIGH probability of being malicious" - elif sev >= 3: print trailer + "\n[!] MEDIUM probability of being malicious" - elif sev >= 2: print trailer + "\n[!] Heuristically sketchy" - elif sev >= 0: print trailer + "\n[-] Scanning didn't determine anything warranting suspicion" + if sev >= 5: print(trailer + "\n[!] HIGH probability of being malicious") + elif sev >= 3: print(trailer + "\n[!] MEDIUM probability of being malicious") + elif sev >= 2: print(trailer + "\n[!] Heuristically sketchy") + elif sev >= 0: print(trailer + "\n[-] Scanning didn't determine anything warranting suspicion") # Clear out the scores to start fresh for the next analysis del counter[:] diff --git a/Readme.md b/Readme.md index 87d299c..1265eb9 100755 --- a/Readme.md +++ b/Readme.md @@ -7,7 +7,7 @@ Requirements ------------ * pdfid * pdfinfo - * yara + * yara-python Usage ----- diff --git a/pdf_rules.yara b/pdf_rules.yara index 6eb285c..6f4f55b 100755 --- a/pdf_rules.yara +++ b/pdf_rules.yara @@ -173,7 +173,7 @@ rule multiple_filtering : PDF strings: $magic = { 25 50 44 46 } - $attrib = /\/Filter.*?(\/ASCIIHexDecode\W+|\/LZWDecode\W+|\/ASCII85Decode\W+|\/FlateDecode\W+|\/RunLengthDecode){2}/ + $attrib = /\/Filter.*(\/ASCIIHexDecode\W+|\/LZWDecode\W+|\/ASCII85Decode\W+|\/FlateDecode\W+|\/RunLengthDecode){2}/ // left out: /CCITTFaxDecode, JBIG2Decode, DCTDecode, JPXDecode, Crypt condition: @@ -394,7 +394,7 @@ rule invalid_xref_numbers : PDF strings: $magic = { 25 50 44 46 } $reg0 = /xref\r?\n?.*\r?\n?.*65535\sf/ - $reg1 = /endstream.*?\r?\n?endobj.*?\r?\n?startxref/ + $reg1 = /endstream.*\r?\n?endobj.*\r?\n?startxref/ condition: $magic at 0 and not $reg0 and not $reg1 }