7 tahun lalu · 4be8c803eb
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,9 @@
 
				 *.html
			
 
				 *.txt
			
 
				 *.jpg
			
 
				+
			
 
				+.vscode/*
			
 
				+!.vscode/settings.json
			
 
				+!.vscode/tasks.json
			
 
				+!.vscode/launch.json
			
 
				+!.vscode/extensions.json
			
--- a/README.md
+++ b/README.md
@@ -8,6 +8,12 @@ This Python based tool was written to quickly query the Expireddomains.net searc
 
				 
			
 
				 ## Changes
			
 
				 
			
 
				+- 4 October 2018
			
 
				+   + Tweaked parsing logic
			
 
				+   + Fixed changes parsed columns indexes
			
 
				+   + Added additional TLDs to found TLD if the TLD is marked available.
			
 
				+        + If thisistest.com is found and thisistest.org is mark available, thisistest.org will be added to the search list
			
 
				+
			
 
				 - 17 September 2018
			
 
				     + Fixed Symantec WebPulse Site Review parsing errors caused by service updates
			
 
				 
			
@@ -67,37 +73,42 @@ Optional - Install additional OCR support dependencies
 
				 
			
 
				 ## Usage
			
 
				 
			
 
				-List DomainHunter options
			
 
				-    
			
 
				-    python3 domainhunter.py -h
			
 
				-    usage: domainhunter.py [-h] [-q QUERY] [-c] [-r MAXRESULTS] [-s SINGLE]
			
 
				-                           [-w MAXWIDTH] [-v]
			
 
				+    usage: domainhunter.py [-h] [-a] [-k KEYWORD] [-c] [-f FILENAME] [--ocr]
			
 
				+                        [-r MAXRESULTS] [-s SINGLE] [-t {0,1,2,3,4,5}]
			
 
				+                        [-w MAXWIDTH] [-V]
			
 
				 
			
 
				-    Finds expired domains, domain categorization, and Archive.org history to
			
 
				-    determine good candidates for C2 and phishing domains
			
 
				+    Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains
			
 
				 
			
 
				     optional arguments:
			
 
				-      -h, --help            show this help message and exit
			
 
				-      -k KEYWORD, --keyword KEYWORD
			
 
				+    -h, --help            show this help message and exit
			
 
				+    -a, --alexa           Filter results to Alexa listings
			
 
				+    -k KEYWORD, --keyword KEYWORD
			
 
				                             Keyword used to refine search results
			
 
				-      -c, --check           Perform domain reputation checks
			
 
				-      -f FILENAME, --filename FILENAME
			
 
				+    -c, --check           Perform domain reputation checks
			
 
				+    -f FILENAME, --filename FILENAME
			
 
				                             Specify input file of line delimited domain names to
			
 
				                             check
			
 
				-      --ocr                 Perform OCR on CAPTCHAs when present
			
 
				-      -r MAXRESULTS, --maxresults MAXRESULTS
			
 
				+    --ocr                 Perform OCR on CAPTCHAs when challenged
			
 
				+    -r MAXRESULTS, --maxresults MAXRESULTS
			
 
				                             Number of results to return when querying latest
			
 
				                             expired/deleted domains
			
 
				-      -s SINGLE, --single SINGLE
			
 
				+    -s SINGLE, --single SINGLE
			
 
				                             Performs detailed reputation checks against a single
			
 
				                             domain name/IP.
			
 
				-      -t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
			
 
				+    -t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
			
 
				                             Modifies request timing to avoid CAPTCHAs. Slowest(0)
			
 
				                             = 90-120 seconds, Default(3) = 10-20 seconds,
			
 
				                             Fastest(5) = no delay
			
 
				-      -w MAXWIDTH, --maxwidth MAXWIDTH
			
 
				+    -w MAXWIDTH, --maxwidth MAXWIDTH
			
 
				                             Width of text table
			
 
				-      -V, --version         show program's version number and exit
			
 
				+    -V, --version         show program's version number and exit
			
 
				+
			
 
				+    Examples:
			
 
				+    ./domainhunter.py -k apples -c --ocr -t5
			
 
				+    ./domainhunter.py --check --ocr -t3
			
 
				+    ./domainhunter.py --single mydomain.com
			
 
				+    ./domainhunter.py --keyword tech --check --ocr --timing 5 --alexa
			
 
				+    ./domaihunter.py --filename inputlist.txt --ocr --timing 5
			
 
				 
			
 
				 Use defaults to check for most recent 100 domains and check reputation
			
 
				     
			
--- a/domainhunter.py
+++ b/domainhunter.py
@@ -293,7 +293,8 @@ if __name__ == "__main__":
 
				 
			
 
				     parser = argparse.ArgumentParser(
			
 
				         description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
			
 
				-        epilog = '''Examples:
			
 
				+        epilog = '''
			
 
				+Examples:
			
 
				 ./domainhunter.py -k apples -c --ocr -t5
			
 
				 ./domainhunter.py --check --ocr -t3
			
 
				 ./domainhunter.py --single mydomain.com
			
@@ -313,8 +314,6 @@ if __name__ == "__main__":
 
				     parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				- 
			
 
				-
			
 
				     # Load dependent modules
			
 
				     try:
			
 
				         import requests
			
@@ -495,6 +494,8 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
 
				         #print(soup)
			
 
				         try:
			
 
				             table = soup.find("table")
			
 
				+
			
 
				+            rows = table.findAll('tr')[1:]
			
 
				             for row in table.findAll('tr')[1:]:
			
 
				 
			
 
				                 # Alternative way to extract domain name
			
@@ -510,60 +511,118 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
 
				                         c2 = cells[2].find(text=True)   # domainpop
			
 
				                         c3 = cells[3].find(text=True)   # birth
			
 
				                         c4 = cells[4].find(text=True)   # Archive.org entries
			
 
				-                        c5 = cells[5].find(text=True)   # similarweb
			
 
				-                        c6 = cells[6].find(text=True)   # similarweb country code
			
 
				-                        c7 = cells[7].find(text=True)   # Dmoz.org
			
 
				-                        c8 = cells[8].find(text=True)   # status com
			
 
				-                        c9 = cells[9].find(text=True)   # status net
			
 
				-                        c10 = cells[10].find(text=True) # status org
			
 
				-                        c11 = cells[11].find(text=True) # status de
			
 
				-                        c12 = cells[12].find(text=True) # tld registered
			
 
				-                        c13 = cells[13].find(text=True) # Source List
			
 
				-                        c14 = cells[14].find(text=True) # Domain Status
			
 
				-                        c15 = ""                        # Related Domains
			
 
				+                        c5 = cells[5].find(text=True)   # Alexa
			
 
				+                        c6 = cells[6].find(text=True)   # Dmoz.org
			
 
				+                        c7 = cells[7].find(text=True)   # status com
			
 
				+                        c8 = cells[8].find(text=True)   # status net
			
 
				+                        c9 = cells[9].find(text=True)   # status org
			
 
				+                        c10 = cells[10].find(text=True) # status de
			
 
				+                        c11 = cells[11].find(text=True) # TLDs
			
 
				+                        c12 = cells[12].find(text=True) # RDT
			
 
				+                        c13 = cells[13].find(text=True) # List
			
 
				+                        c14 = cells[14].find(text=True) # Status
			
 
				+                        c15 = ""                        # Links 
			
 
				+
			
 
				+                        # create available TLD list
			
 
				+                        available = ''
			
 
				+                        if c7 == "available":
			
 
				+                            available += ".com "
			
 
				+
			
 
				+                        if c8 == "available":
			
 
				+                            available += ".net "
			
 
				+
			
 
				+                        if c9 == "available":
			
 
				+                            available += ".org "
			
 
				+
			
 
				+                        if c10 == "available":
			
 
				+                            available += ".de "
			
 
				+                        
			
 
				+                        # Only grab status for keyword searches since it doesn't exist otherwise
			
 
				+                        status = ""
			
 
				+                        if keyword:
			
 
				+                            status = c14
			
 
				+                        
			
 
				+                        # Only add Expired, not Pending, Backorder, etc
			
 
				+                        if c13 == "Expired":
			
 
				+                            # Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
			
 
				+                            #if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
			
 
				+                            #    domain_list.append([c0,c3,c4,available,status]) 
			
 
				+
			
 
				+                            # Add other TLDs to list if marked available
			
 
				+                            if (c7 == "available") and (c0 not in maldomainsList):
			
 
				+                                dom = c0.split(".")[0] + ".com"
			
 
				+                                domain_list.append([dom,c3,c4,available,status]) 
			
 
				+
			
 
				+                            if (c8 == "available") and (c0 not in maldomainsList):
			
 
				+                                dom = c0.split(".")[0] + ".net"
			
 
				+                                domain_list.append([dom,c3,c4,available,status])    
			
 
				+
			
 
				+                            if (c9 == "available") and (c0 not in maldomainsList):
			
 
				+                                dom = c0.split(".")[0] + ".org"
			
 
				+                                domain_list.append([dom,c3,c4,available,status])  
			
 
				+
			
 
				+                            if (c10 == "available") and (c0 not in maldomainsList):
			
 
				+                                dom = c0.split(".")[0] + ".de"
			
 
				+                                domain_list.append([dom,c3,c4,available,status])  
			
 
				 
			
 
				                     # Non-keyword search table format is slightly different
			
 
				                     else:
			
 
				+                    
			
 
				                         c0 = cells[0].find(text=True)   # domain
			
 
				                         c1 = cells[1].find(text=True)   # bl
			
 
				                         c2 = cells[2].find(text=True)   # domainpop
			
 
				                         c3 = cells[3].find(text=True)   # birth
			
 
				                         c4 = cells[4].find(text=True)   # Archive.org entries
			
 
				-                        c5 = cells[5].find(text=True)   # similarweb
			
 
				-                        c6 = cells[6].find(text=True)   # similarweb country code
			
 
				-                        c7 = cells[7].find(text=True)   # Dmoz.org
			
 
				-                        c8 = cells[8].find(text=True)   # status com
			
 
				-                        c9 = cells[9].find(text=True)   # status net
			
 
				-                        c10 = cells[10].find(text=True) # status org
			
 
				-                        c11 = cells[11].find(text=True) # status de
			
 
				-                        c12 = cells[12].find(text=True) # tld registered
			
 
				-                        c13 = cells[13].find(text=True) # changes
			
 
				-                        c14 = cells[14].find(text=True) # whois
			
 
				-
			
 
				-                    available = ''
			
 
				-                    if c8 == "available":
			
 
				-                        available += ".com "
			
 
				-
			
 
				-                    if c9 == "available":
			
 
				-                        available += ".net "
			
 
				-
			
 
				-                    if c10 == "available":
			
 
				-                        available += ".org "
			
 
				-
			
 
				-                    if c11 == "available":
			
 
				-                        available += ".de "
			
 
				-
			
 
				-                    # Only grab status for keyword searches since it doesn't exist otherwise
			
 
				-                    status = ""
			
 
				-                    if keyword:
			
 
				-                        status = c14
			
 
				-
			
 
				-                    # Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
			
 
				-                    if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
			
 
				-                        domain_list.append([c0,c3,c4,available,status])               
			
 
				-
			
 
				+                        c5 = cells[5].find(text=True)   # Alexa
			
 
				+                        c6 = cells[6].find(text=True)   # Dmoz.org
			
 
				+                        c7 = cells[7].find(text=True)   # status com
			
 
				+                        c8 = cells[8].find(text=True)   # status net
			
 
				+                        c9 = cells[9].find(text=True)   # status org
			
 
				+                        c10 = cells[10].find(text=True) # status de
			
 
				+                        c11 = cells[11].find(text=True) # TLDs
			
 
				+                        c12 = cells[12].find(text=True) # RDT
			
 
				+                        c13 = cells[13].find(text=True) # End Date
			
 
				+                        c14 = cells[14].find(text=True) # Links
			
 
				+                        
			
 
				+                        # create available TLD list
			
 
				+                        available = ''
			
 
				+                        if c7 == "available":
			
 
				+                            available += ".com "
			
 
				+
			
 
				+                        if c8 == "available":
			
 
				+                            available += ".net "
			
 
				+
			
 
				+                        if c9 == "available":
			
 
				+                            available += ".org "
			
 
				+
			
 
				+                        if c10 == "available":
			
 
				+                            available += ".de "
			
 
				+
			
 
				+                        status = ""
			
 
				+
			
 
				+                        # Add other TLDs to list if marked available
			
 
				+                        if (c7 == "available") and (c0 not in maldomainsList):
			
 
				+                            dom = c0.split(".")[0] + ".com"
			
 
				+                            domain_list.append([dom,c3,c4,available,status]) 
			
 
				+
			
 
				+                        if (c8 == "available") and (c0 not in maldomainsList):
			
 
				+                            dom = c0.split(".")[0] + ".net"
			
 
				+                            domain_list.append([dom,c3,c4,available,status])    
			
 
				+                        
			
 
				+                        if (c9 == "available") and (c0 not in maldomainsList):
			
 
				+                            dom = c0.split(".")[0] + ".org"
			
 
				+                            domain_list.append([dom,c3,c4,available,status])  
			
 
				+                    
			
 
				+                        if (c10 == "available") and (c0 not in maldomainsList):
			
 
				+                            dom = c0.split(".")[0] + ".de"
			
 
				+                            domain_list.append([dom,c3,c4,available,status])  
			
 
				+
			
 
				+                        # Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
			
 
				+                        #if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
			
 
				+                        #    domain_list.append([c0,c3,c4,available,status]) 
			
 
				+                        
			
 
				         except Exception as e: 
			
 
				-            #print(e)
			
 
				+            print("[!] Error: ", e)
			
 
				             pass
			
 
				 
			
 
				         # Add additional sleep on requests to ExpiredDomains.net to avoid errors
			
@@ -577,7 +636,10 @@ If you plan to use this content for illegal purpose, don't.  Have a nice day :)'
 
				         if check:
			
 
				             print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
			
 
				         
			
 
				-        for domain_entry in domain_list:
			
 
				+        domain_list_unique = []
			
 
				+        [domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
			
 
				+
			
 
				+        for domain_entry in domain_list_unique:
			
 
				             domain = domain_entry[0]
			
 
				             birthdate = domain_entry[1]
			
 
				             archiveentries = domain_entry[2]