hiddenillusion · KoikyLansoy · Sep 21, 2025 · Sep 21, 2025 · Sep 21, 2025 · Sep 21, 2025
diff --git a/AnalyzePDF.py b/AnalyzePDF.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Analyzes PDF files by looking at their characteristics in order to add some intelligence into the determination of them being malicious or benign.
@@ -28,7 +28,7 @@
 # Version 0.2 
 # Date: 10-11-2012
 # Requirements:
-#	- Python 2.x
+#	- Python 3.x
 #	- YARA (http://plusvic.github.io/yara/)
 #	- pdfid (http://blog.didierstevens.com/programs/pdf-tools/)
 # Optional:	
@@ -61,12 +61,12 @@
 try:
     import pdfid 
 except ImportError:
-    print "[!] PDFiD not installed"
+    print("[!] PDFiD not installed")
     sys.exit()
 try:
     import yara
 except ImportError:
-    print "[!] Yara not installed"
+    print("[!] Yara not installed")
     sys.exit()	
 
 # Initialize the list(s) where PDF attribs will be added to
@@ -88,7 +88,7 @@
 
 # Verify supplied path exists or die
 if not os.path.exists(args['Path']):
-    print "[!] The supplied path does not exist"
+    print("[!] The supplied path does not exist")
     sys.exit()
 
 # Configure YARA rules
@@ -98,15 +98,15 @@
     rules = '/usr/local/etc/capabilities.yara' # REMnux location
 
 if not os.path.exists(rules):
-    print "[!] Correct path to YARA rules?"
+    print("[!] Correct path to YARA rules?")
     sys.exit()
 else:
     try:	
         r = yara.compile(rules)
         if args['move']:
             ydir = args['move']
-    except Exception, msg:
-        print "[!] YARA compile error: %s" % msg
+    except Exception as msg:
+        print("[!] YARA compile error: %s" % msg)
         sys.exit()
 
 def main():
@@ -129,8 +129,8 @@ def sha256(pdf):
         data = f.read()
         sha256 =  hashlib.sha256(data).hexdigest()
         f.close()
-    except Exception, msg:
-        print msg
+    except Exception as msg:
+        print(msg)
 
     return sha256
 
@@ -144,11 +144,11 @@ def fileID(pdf):
     """
     f = open(pdf,'rb')
     s = f.read(1024)
-    if '\x25\x50\x44\x46' in s:
-        print "\n" + trailer
-        print "[+] Analyzing: %s" % pdf
-        print filler
-        print "[-] Sha256: %s" % sha256(pdf)
+    if b'\x25\x50\x44\x46' in s:
+        print("\n" + trailer)
+        print("[+] Analyzing: %s" % pdf)
+        print(filler)
+        print("[-] Sha256: %s" % sha256(pdf))
         info(pdf)
     elif os.path.isdir(pdf): pwalk(pdf)
     f.close()
@@ -171,18 +171,18 @@ def info(pdf):
         for line in p.stderr:
             if re.search('Unterminated hex string|Loop in Pages tree|Illegal digit in hex char in name', line):
                 counter.append("sketchy")
-                print "[-] Sketchyness detected" 
+                print("[-] Sketchyness detected") 
             elif re.search('Unexpected end of file in flate stream|End of file inside array', line):
                 counter.append("eof")
-                print "[-] EoF problem" 
+                print("[-] EoF problem") 
             elif re.search('Couldn\'t find trailer dictionary', line):
                 counter.append("trailer")			
             elif re.search('Invalid XRef entry|No valid XRef size in trailer|Invalid XRef entry|Couldn\'t read xref table', line):
                 counter.append("xref")
-                print "[-] Invalid XREF"
+                print("[-] Invalid XREF")
                 break
-    except Exception, msg:
-        print "[!] pdfinfo error: %s" % msg
+    except Exception as msg:
+        print("[!] pdfinfo error: %s" % msg)
         pass
 
     id(pdf)
@@ -195,51 +195,51 @@ def id(pdf):
     except Exception:
         # I've observed some files raising errors with the 'extraData' switch
         command = pdfid.PDFiD2String(pdfid.PDFiD(pdf, True, False, False, True), True)
-        print "[!] PDFiD couldn\'t parse extra data"
+        print("[!] PDFiD couldn\'t parse extra data")
         extra = False
 
     for line in command.split('\n'):
         count = re.split(r'[\s]+', line)
         if "PDF Header" in line and not re.match('%PDF-1\.\d', count[3]):
             counter.append("header")
-            print "[-] Invalid version number : \"%s\"" % count[3]
+            print("[-] Invalid version number : \"%s\"" % count[3])
         elif "/Page " in line:
             page_counter.append(count[2])
         elif "/Pages " in line:
             page_counter.append(count[2])
         elif "/JS " in line and not re.match('0', count[2]):
             counter.append("js")
-            print "[-] JavaScript count.......: %s" % count[2]
+            print("[-] JavaScript count.......: %s" % count[2])
             if count[2] > "1":
                 counter.append("mucho_javascript")
-                print "\t[*] That\'s a lot of js ..."
+                print("\t[*] That\'s a lot of js ...")
         elif "/AcroForm " in line and not re.match('0', count[2]):
             counter.append("acroform")
-            print "[-] AcroForm...............: %s" % count[2]
+            print("[-] AcroForm...............: %s" % count[2])
         elif "/AA " in line and not re.match('0', count[2]):
             counter.append("aa")
-            print "[-] Additional Action......: %s" % count[2]
+            print("[-] Additional Action......: %s" % count[2])
         elif "/OpenAction " in line and not re.match('0', count[2]):
             counter.append("oa")
-            print "[-] Open Action............: %s" % count[2]
+            print("[-] Open Action............: %s" % count[2])
         elif "/Launch " in line and not re.match('0', count[2]):
             counter.append("launch")
-            print "[-] Launch Action..........: %s" % count[2]
+            print("[-] Launch Action..........: %s" % count[2])
         elif "/EmbeddedFiles " in line and not re.match('0', count[2]):
             counter.append("embed")
-            print "[-] Embedded File..........: %s" % count[2]
+            print("[-] Embedded File..........: %s" % count[2])
         #elif "trailer" in line and not re.match('0|1', count[2]):
         #    print "[-] Trailer count..........: %s" % count[2]
         #    print "\t[*] Multiple versions detected"
         elif "Total entropy:" in line:
             tentropy = count[3]		
-            print "[-] Total Entropy..........: %7s" % count[3]
+            print("[-] Total Entropy..........: %7s" % count[3])
         elif "Entropy inside streams:" in line:
             ientropy = count[4]
-            print "[-] Entropy inside streams : %7s" % count[4]
+            print("[-] Entropy inside streams : %7s" % count[4])
         elif "Entropy outside streams:" in line:
             oentropy = count[4]	
-            print "[-] Entropy outside streams: %7s" % count[4]
+            print("[-] Entropy outside streams: %7s" % count[4])
     """
 	Entropy levels:
 	0 = orderly, 8 = random
@@ -269,55 +269,55 @@ def id(pdf):
         if togo > 2:
             if oe_long + 2 > te_long:
                 counter.append("entropy")		
-                print "\t[*] Entropy of outside stream is questionable:"
-                print "\t[-] Outside (%s) +2 (%s) > Total (%s)" % (oe_long,oe_long +2,te_long)
+                print("\t[*] Entropy of outside stream is questionable:")
+                print("\t[-] Outside (%s) +2 (%s) > Total (%s)" % (oe_long,oe_long +2,te_long))
         elif oe_long > te_long:
             counter.append("entropy")		
-            print "\t[*] Entropy of outside stream is questionable:"
-            print "\t[-] Outside (%s) > Total (%s)" % (oe_long,te_long)
+            print("\t[*] Entropy of outside stream is questionable:")
+            print("\t[-] Outside (%s) > Total (%s)" % (oe_long,te_long))
         if str(te_short) <= "2.0" or str(ie_short) <= "2.0":
             counter.append("entropy")		
-            print "\t[*] LOW entropy detected:"
-            print "\t[-] Total (%s) or Inside (%s) <= 2.0" % (te_short,ie_short)
+            print("\t[*] LOW entropy detected:")
+            print("\t[-] Total (%s) or Inside (%s) <= 2.0" % (te_short,ie_short))
 
     # Process the /Page(s) results here just to make sure they were both read
     if re.match('0', page_counter[0]) and re.match('0', page_counter[1]):
         counter.append("page")
-        print "[-] Page count suspicious:"  
-        print "\t[*] Both /Page (%s) and /Pages (%s) = 0" % (page_counter[0],page_counter[1])
+        print("[-] Page count suspicious:")
+        print("\t[*] Both /Page (%s) and /Pages (%s) = 0" % (page_counter[0],page_counter[1]))
     elif re.match('0', page_counter[0]) and not re.match('0', page_counter[1]):
         counter.append("page")
-        print "[-] Page count suspicious, no individual pages defined:"  
-        print "\t[*] /Page = (%s) , /Pages = (%s)" % (page_counter[0],page_counter[1])
+        print("[-] Page count suspicious, no individual pages defined:")
+        print("\t[*] /Page = (%s) , /Pages = (%s)" % (page_counter[0],page_counter[1]))
     elif re.match('1$', page_counter[0]):
         counter.append("page")
-        print "[-] (1) page PDF"  
+        print("[-] (1) page PDF")  
 
     yarascan(pdf)
 
 def yarascan(pdf):
     try:
         ymatch = r.match(pdf)
         if len(ymatch):
-            print "[-] YARA hit(s): %s" % ymatch
+            print("[-] YARA hit(s): %s" % ymatch)
             for rule in ymatch:
                 meta = rule.meta
-                for key, value in meta.iteritems():
+                for key, value in meta.items():
                     # If the YARA rule has a weight in it's metadata then parse that for later calculation
                     if "weight" in key:
                       yscore.append(value)
                 if not ydir == False:
-                    print "[-] Moving malicious file to:",ydir
+                    print("[-] Moving malicious file to:",ydir)
                     # This will move the file if _any_ YARA rule triggers...which might trick you if the
                     # rule that triggers on it doesn't have a weight or is displayed in the output
                     if not os.path.exists(ydir):
                         os.makedirs(ydir)
                     try:
                         shutil.move(pdf, ydir)
-                    except Exception, msg:
+                    except Exception as msg:
                         continue
-    except Exception, msg:
-        print msg
+    except Exception as msg:
+        print(msg)
 
     eval(counter)
 
@@ -328,9 +328,9 @@ def eval(counter):
 
     Rating system: 0 (benign), >=2 (sketchy), >=3 (medium), >=5 (high)
     """
-    print filler	
+    print(filler)	
     ytotal = sum(yscore)
-    print "[-] Total YARA score.......: %s" % ytotal
+    print("[-] Total YARA score.......: %s" % ytotal)
     sev = 0
 
     # Below are various combinations used to add some intelligence and help evaluate if a file is malicious or benign.  
@@ -372,14 +372,14 @@ def eval(counter):
     if "page" in counter and "header" in counter: sev += 1	
     if "header" in counter and "embed" in counter: sev += 1
 
-    print "[-] Total severity score...: %s" % sev
+    print("[-] Total severity score...: %s" % sev)
     sev = (ytotal + sev)
-    print "[-] Overall score..........: %s" % sev
+    print("[-] Overall score..........: %s" % sev)
 
-    if sev >= 5: print trailer + "\n[!] HIGH probability of being malicious"
-    elif sev >= 3: print trailer + "\n[!] MEDIUM probability of being malicious"
-    elif sev >= 2: print trailer + "\n[!] Heuristically sketchy"
-    elif sev >= 0: print trailer + "\n[-] Scanning didn't determine anything warranting suspicion"
+    if sev >= 5: print(trailer + "\n[!] HIGH probability of being malicious")
+    elif sev >= 3: print(trailer + "\n[!] MEDIUM probability of being malicious")
+    elif sev >= 2: print(trailer + "\n[!] Heuristically sketchy")
+    elif sev >= 0: print(trailer + "\n[-] Scanning didn't determine anything warranting suspicion")
 
     # Clear out the scores to start fresh for the next analysis
     del counter[:]

diff --git a/Readme.md b/Readme.md
@@ -7,7 +7,7 @@ Requirements
 ------------
 	* pdfid
 	* pdfinfo
-	* yara
+	* yara-python
 
 Usage
 -----

diff --git a/pdf_rules.yara b/pdf_rules.yara
@@ -173,7 +173,7 @@ rule multiple_filtering : PDF
 
         strings:
                 $magic = { 25 50 44 46 }
-                $attrib = /\/Filter.*?(\/ASCIIHexDecode\W+|\/LZWDecode\W+|\/ASCII85Decode\W+|\/FlateDecode\W+|\/RunLengthDecode){2}/           
+                $attrib = /\/Filter.*(\/ASCIIHexDecode\W+|\/LZWDecode\W+|\/ASCII85Decode\W+|\/FlateDecode\W+|\/RunLengthDecode){2}/
 				// left out: /CCITTFaxDecode, JBIG2Decode, DCTDecode, JPXDecode, Crypt
 
         condition: 
@@ -394,7 +394,7 @@ rule invalid_xref_numbers : PDF
         strings:
                 $magic = { 25 50 44 46 }
                 $reg0 = /xref\r?\n?.*\r?\n?.*65535\sf/
-                $reg1 = /endstream.*?\r?\n?endobj.*?\r?\n?startxref/
+                $reg1 = /endstream.*\r?\n?endobj.*\r?\n?startxref/
         condition:
                 $magic at 0 and not $reg0 and not $reg1
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,7 +7,7 @@ Requirements @@
     ------------
     	* pdfid
     	* pdfinfo
-    	* yara
+    	* yara-python
     Usage
     -----
@@ Expand Down @@