Browse Source

Fix categorized domain filtering logic

Andrew Chiles 6 years ago
parent
commit
131eec6396
3 changed files with 38 additions and 75 deletions
  1. 2 1
      .gitignore
  2. 3 2
      README.md
  3. 33 72
      domainhunter.py

+ 2 - 1
.gitignore

@@ -1,9 +1,10 @@
 *.html
 *.html
 *.txt
 *.txt
 *.jpg
 *.jpg
+Pipfile*
 
 
 .vscode/*
 .vscode/*
 !.vscode/settings.json
 !.vscode/settings.json
 !.vscode/tasks.json
 !.vscode/tasks.json
 !.vscode/launch.json
 !.vscode/launch.json
-!.vscode/extensions.json
+!.vscode/extensions.json

+ 3 - 2
README.md

@@ -8,11 +8,12 @@ This Python based tool was written to quickly query the Expireddomains.net searc
 
 
 ## Changes
 ## Changes
 
 
+- 5 October 2018
+   + Fixed logic for filtering domains with desirable categorizations. Previously, some error conditions weren't filtered and would result in domains without a valid categorization making it into the final list.
+
 - 4 October 2018
 - 4 October 2018
    + Tweaked parsing logic
    + Tweaked parsing logic
    + Fixed changes parsed columns indexes
    + Fixed changes parsed columns indexes
-   + Added additional TLDs to found TLD if the TLD is marked available.
-        + If thisistest.com is found and thisistest.org is mark available, thisistest.org will be added to the search list
 
 
 - 17 September 2018
 - 17 September 2018
     + Fixed Symantec WebPulse Site Review parsing errors caused by service updates
     + Fixed Symantec WebPulse Site Review parsing errors caused by service updates

+ 33 - 72
domainhunter.py

@@ -16,7 +16,7 @@ import json
 import base64
 import base64
 import os
 import os
 
 
-__version__ = "20180917"
+__version__ = "20181005"
 
 
 ## Functions
 ## Functions
 
 
@@ -86,7 +86,7 @@ def checkBluecoat(domain):
 
 
     except Exception as e:
     except Exception as e:
         print('[-] Error retrieving Bluecoat reputation! {0}'.format(e))
         print('[-] Error retrieving Bluecoat reputation! {0}'.format(e))
-        return "-"
+        return "error"
 
 
 def checkIBMXForce(domain):
 def checkIBMXForce(domain):
     try: 
     try: 
@@ -122,9 +122,9 @@ def checkIBMXForce(domain):
 
 
         return a
         return a
 
 
-    except:
-        print('[-] Error retrieving IBM x-Force reputation!')
-        return "-"
+    except Exception as e:
+        print('[-] Error retrieving IBM-Xforce reputation! {0}'.format(e))
+        return "error"
 
 
 def checkTalos(domain):
 def checkTalos(domain):
     url = 'https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc'.format(domain)
     url = 'https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc'.format(domain)
@@ -150,9 +150,9 @@ def checkTalos(domain):
        
        
         return a
         return a
 
 
-    except:
-        print('[-] Error retrieving Talos reputation!')
-        return "-"
+    except Exception as e:
+        print('[-] Error retrieving Talos reputation! {0}'.format(e))
+        return "error"
 
 
 def checkMXToolbox(domain):
 def checkMXToolbox(domain):
     url = 'https://mxtoolbox.com/Public/Tools/BrandReputation.aspx'
     url = 'https://mxtoolbox.com/Public/Tools/BrandReputation.aspx'
@@ -211,7 +211,7 @@ def checkMXToolbox(domain):
 
 
     except Exception as e:
     except Exception as e:
         print('[-] Error retrieving Google SafeBrowsing and PhishTank reputation!')
         print('[-] Error retrieving Google SafeBrowsing and PhishTank reputation!')
-        return "-"
+        return "error"
 
 
 def downloadMalwareDomains(malwaredomainsURL):
 def downloadMalwareDomains(malwaredomainsURL):
     url = malwaredomainsURL
     url = malwaredomainsURL
@@ -403,7 +403,7 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
         checkDomain(single)
         checkDomain(single)
         exit(0)
         exit(0)
 
 
-    # Perform detailed domain reputation checks against input file, print table, and quit
+    # Perform detailed domain reputation checks against input file, print table, and quit. This does not generate an HTML report
     if filename:
     if filename:
         # Initialize our list with an empty row for the header
         # Initialize our list with an empty row for the header
         data = []
         data = []
@@ -451,10 +451,10 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
         print('[*] Fetching expired or deleted domains containing "{}"'.format(keyword))
         print('[*] Fetching expired or deleted domains containing "{}"'.format(keyword))
         for i in range (0,maxresults,25):
         for i in range (0,maxresults,25):
             if i == 0:
             if i == 0:
-                urls.append("{}/?q={}&fwhois=22&falexa={}".format(expireddomainsqueryURL,keyword,alexa))
+                urls.append("{}/?q={}&fwhois=22&ftlds[]=2&ftlds[]=3&ftlds[]=4&falexa={}".format(expireddomainsqueryURL,keyword,alexa))
                 headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(keyword)
                 headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(keyword)
             else:
             else:
-                urls.append("{}/?start={}&q={}&fwhois=22&falexa={}".format(expireddomainsqueryURL,i,keyword,alexa))
+                urls.append("{}/?start={}&q={}&ftlds[]=2&ftlds[]=3&ftlds[]=4&fwhois=22&falexa={}".format(expireddomainsqueryURL,i,keyword,alexa))
                 headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),keyword)
                 headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),keyword)
     
     
     # If no keyword provided, generate list of recently expired domains URLS (batches of 25 results).
     # If no keyword provided, generate list of recently expired domains URLS (batches of 25 results).
@@ -545,25 +545,8 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
                         # Only add Expired, not Pending, Backorder, etc
                         # Only add Expired, not Pending, Backorder, etc
                         if c13 == "Expired":
                         if c13 == "Expired":
                             # Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
                             # Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
-                            #if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
-                            #    domain_list.append([c0,c3,c4,available,status]) 
-
-                            # Add other TLDs to list if marked available
-                            if (c7 == "available") and (c0 not in maldomainsList):
-                                dom = c0.split(".")[0] + ".com"
-                                domain_list.append([dom,c3,c4,available,status]) 
-
-                            if (c8 == "available") and (c0 not in maldomainsList):
-                                dom = c0.split(".")[0] + ".net"
-                                domain_list.append([dom,c3,c4,available,status])    
-
-                            if (c9 == "available") and (c0 not in maldomainsList):
-                                dom = c0.split(".")[0] + ".org"
-                                domain_list.append([dom,c3,c4,available,status])  
-
-                            if (c10 == "available") and (c0 not in maldomainsList):
-                                dom = c0.split(".")[0] + ".de"
-                                domain_list.append([dom,c3,c4,available,status])  
+                            if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
+                                domain_list.append([c0,c3,c4,available,status]) 
 
 
                     # Non-keyword search table format is slightly different
                     # Non-keyword search table format is slightly different
                     else:
                     else:
@@ -600,26 +583,9 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
 
 
                         status = ""
                         status = ""
 
 
-                        # Add other TLDs to list if marked available
-                        if (c7 == "available") and (c0 not in maldomainsList):
-                            dom = c0.split(".")[0] + ".com"
-                            domain_list.append([dom,c3,c4,available,status]) 
-
-                        if (c8 == "available") and (c0 not in maldomainsList):
-                            dom = c0.split(".")[0] + ".net"
-                            domain_list.append([dom,c3,c4,available,status])    
-                        
-                        if (c9 == "available") and (c0 not in maldomainsList):
-                            dom = c0.split(".")[0] + ".org"
-                            domain_list.append([dom,c3,c4,available,status])  
-                    
-                        if (c10 == "available") and (c0 not in maldomainsList):
-                            dom = c0.split(".")[0] + ".de"
-                            domain_list.append([dom,c3,c4,available,status])  
-
                         # Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
                         # Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
-                        #if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
-                        #    domain_list.append([c0,c3,c4,available,status]) 
+                        if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
+                            domain_list.append([c0,c3,c4,available,status]) 
                         
                         
         except Exception as e: 
         except Exception as e: 
             print("[!] Error: ", e)
             print("[!] Error: ", e)
@@ -633,13 +599,12 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
         print("[-] No domain results found or none are currently available for purchase!")
         print("[-] No domain results found or none are currently available for purchase!")
         exit(0)
         exit(0)
     else:
     else:
-        if check:
-            print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
-        
         domain_list_unique = []
         domain_list_unique = []
         [domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
         [domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
 
 
-        index = 1
+        # Print number of domains to perform reputation checks against
+        if check:
+            print("\n[*] Performing reputation checks for {} domains".format(len(domain_list_unique)))
 
 
         for domain_entry in domain_list_unique:
         for domain_entry in domain_list_unique:
             domain = domain_entry[0]
             domain = domain_entry[0]
@@ -647,42 +612,38 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
             archiveentries = domain_entry[2]
             archiveentries = domain_entry[2]
             availabletlds = domain_entry[3]
             availabletlds = domain_entry[3]
             status = domain_entry[4]
             status = domain_entry[4]
-            bluecoat = ''
-            ibmxforce = ''
-            ciscotalos = ''
+            bluecoat = '-'
+            ibmxforce = '-'
+            ciscotalos = '-'
 
 
             # Perform domain reputation checks
             # Perform domain reputation checks
             if check:
             if check:
-                print("[*] Domain {} of {}".format(str(index),str(len(domain_list))))
+                
                 bluecoat = checkBluecoat(domain)
                 bluecoat = checkBluecoat(domain)
                 print("[+] {}: {}".format(domain, bluecoat))
                 print("[+] {}: {}".format(domain, bluecoat))
                 ibmxforce = checkIBMXForce(domain)
                 ibmxforce = checkIBMXForce(domain)
                 print("[+] {}: {}".format(domain, ibmxforce))
                 print("[+] {}: {}".format(domain, ibmxforce))
                 ciscotalos = checkTalos(domain)
                 ciscotalos = checkTalos(domain)
                 print("[+] {}: {}".format(domain, ciscotalos))
                 print("[+] {}: {}".format(domain, ciscotalos))
+                print("")
                 # Sleep to avoid captchas
                 # Sleep to avoid captchas
                 doSleep(timing)
                 doSleep(timing)
 
 
-            # Mark reputation checks as skipped if -c flag not present
-            else:
-                bluecoat = '-'
-                ibmxforce = '-'
-                ciscotalos = '-'
-
             # Append entry to new list with reputation if at least one service reports reputation
             # Append entry to new list with reputation if at least one service reports reputation
-            if not ((bluecoat in ('Uncategorized','badurl','Suspicious','Malicious Sources/Malnets','captcha','Phishing')) and ibmxforce == "Not found." and ciscotalos == "Uncategorized"):
+            if not ((bluecoat in ('Uncategorized','badurl','Suspicious','Malicious Sources/Malnets','captcha','Phishing','Placeholders','Spam','error')) \
+                and (ibmxforce in ('Not found.','error')) and (ciscotalos in ('Uncategorized','error'))):
+                
                 data.append([domain,birthdate,archiveentries,availabletlds,status,bluecoat,ibmxforce,ciscotalos])
                 data.append([domain,birthdate,archiveentries,availabletlds,status,bluecoat,ibmxforce,ciscotalos])
 
 
-            index += 1
-
     # Sort domain list by column 2 (Birth Year)
     # Sort domain list by column 2 (Birth Year)
     sortedDomains = sorted(data, key=lambda x: x[1], reverse=True) 
     sortedDomains = sorted(data, key=lambda x: x[1], reverse=True) 
 
 
-    if len(sortedDomains) == 0:
-        print("\n[-] No domains discovered with a desireable categorization!")
-        exit(0)
-    else:
-        print("\n[*] {} of {} domains discovered with a potentially desireable categorization!".format(len(sortedDomains),len(domain_list)))
+    if check:
+        if len(sortedDomains) == 0:
+            print("[-] No domains discovered with a desireable categorization!")
+            exit(0)
+        else:
+            print("[*] {} of {} domains discovered with a potentially desireable categorization!".format(len(sortedDomains),len(domain_list)))
 
 
     # Build HTML Table
     # Build HTML Table
     html = ''
     html = ''