domainhunter.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: Joe Vest and Andrew Chiles
  4. ## Description: Checks expired domains, bluecoat categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # To-do:
  7. # Add reputation categorizations to identify desireable vs undesireable domains
  8. # Code cleanup/optimization
  9. # Read in list of desired domain names
  10. import time
  11. import random
  12. import argparse
  13. import json
  14. ## Functions
  15. def checkBluecoat(domain):
  16. try:
  17. url = 'https://sitereview.bluecoat.com/rest/categorization'
  18. postData = {"url":domain} # HTTP POST Parameters
  19. headers = {'User-Agent':useragent,
  20. 'X-Requested-With':'XMLHttpRequest',
  21. 'Referer':'https://sitereview.bluecoat.com/sitereview.jsp'}
  22. print('[*] BlueCoat Check: {}'.format(domain))
  23. response = s.post(url,headers=headers,data=postData,verify=False)
  24. responseJson = json.loads(response.text)
  25. if 'errorType' in responseJson:
  26. a = responseJson['errorType']
  27. else:
  28. soupA = BeautifulSoup(responseJson['categorization'], 'lxml')
  29. a = soupA.find("a").text
  30. return a
  31. except:
  32. print('[-] Error retrieving Bluecoat reputation!')
  33. return "-"
  34. def checkIBMxForce(domain):
  35. try:
  36. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  37. headers = {'User-Agent':useragent,
  38. 'Accept':'application/json, text/plain, */*',
  39. 'x-ui':'XFE',
  40. 'Origin':url,
  41. 'Referer':url}
  42. print('[*] IBM xForce Check: {}'.format(domain))
  43. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  44. response = s.get(url,headers=headers,verify=False)
  45. responseJson = json.loads(response.text)
  46. if 'error' in responseJson:
  47. a = responseJson['error']
  48. else:
  49. a = responseJson["result"]['cats']
  50. return a
  51. except:
  52. print('[-] Error retrieving IBM x-Force reputation!')
  53. return "-"
  54. def downloadMalwareDomains():
  55. url = malwaredomains
  56. response = s.get(url,headers=headers,verify=False)
  57. responseText = response.text
  58. if response.status_code == 200:
  59. return responseText
  60. else:
  61. print("Error reaching:{} Status: {}").format(url, response.status_code)
  62. ## MAIN
  63. if __name__ == "__main__":
  64. try:
  65. import requests
  66. from bs4 import BeautifulSoup
  67. from texttable import Texttable
  68. except Exception as e:
  69. print "Expired Domains Reputation Check"
  70. print "[-] Missing dependencies: {}".format(str(e))
  71. print "[*] Install required dependencies by running `pip install -r requirements.txt`"
  72. quit(0)
  73. parser = argparse.ArgumentParser(description='Checks expired domains, bluecoat categorization, and Archive.org history to determine good candidates for C2 and phishing domains')
  74. parser.add_argument('-q','--query', help='Optional keyword used to refine search results', required=False, type=str)
  75. parser.add_argument('-c','--check', help='Perform slow reputation checks', required=False, default=False, action='store_true')
  76. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains (min. 100)', required=False, type=int, default=100)
  77. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, type=int, default=400)
  78. args = parser.parse_args()
  79. ## Variables
  80. query = False
  81. if args.query:
  82. query = args.query
  83. check = args.check
  84. maxresults = args.maxresults
  85. if maxresults < 100:
  86. maxresults = 100
  87. maxwidth=args.maxwidth
  88. t = Texttable(max_width=maxwidth)
  89. malwaredomains = 'http://mirror1.malwaredomains.com/files/justdomains'
  90. expireddomainsqueryurl = 'https://www.expireddomains.net/domain-name-search'
  91. timestamp = time.strftime("%Y%m%d_%H%M%S")
  92. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  93. headers = {'User-Agent':useragent}
  94. requests.packages.urllib3.disable_warnings()
  95. # HTTP Session container, used to manage cookies, session tokens and other session information
  96. s = requests.Session()
  97. data = []
  98. title = '''
  99. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  100. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  101. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  102. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  103. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  104. print(title)
  105. print("")
  106. print("Expired Domains Reptutation Checker")
  107. print("")
  108. print("DISCLAIMER:")
  109. print("This is for educational purposes only!")
  110. disclaimer = '''It is designed to promote education and the improvement of computer/cyber security.
  111. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  112. If you plan to use this content for illegal purpose, don't. Have a nice day :)'''
  113. print(disclaimer)
  114. print("")
  115. print("********************************************")
  116. print("Start Time: {}").format(timestamp)
  117. print("TextTable Column Width: {}").format(str(maxwidth))
  118. print("Checking Reputation: {}").format(str(check))
  119. print("Number Domains Checked: {}").format(maxresults)
  120. print("********************************************")
  121. runtime = 0
  122. if check:
  123. runtime = (maxresults * 20) / 60
  124. else:
  125. runtime = maxresults * .15 / 60
  126. print("Estimated Max Run Time: {} minutes").format(int(runtime))
  127. print("")
  128. print('[*] Downloading malware domain list from {}'.format(malwaredomains))
  129. maldomains = downloadMalwareDomains()
  130. maldomains_list = maldomains.split("\n")
  131. # Use the keyword string to narrow domain search if provided
  132. # Need to modify this to pull more than the first 25 results
  133. if query:
  134. print('[*] Fetching expired or deleted domains containing "{}"...').format(query)
  135. for i in range (0,maxresults,25):
  136. if i == 0:
  137. url = "{}/?q={}".format(expireddomainsqueryurl,query)
  138. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&searchinit=1'.format(query)
  139. else:
  140. url = "{}/?start={}&q={}".format(expireddomainsqueryurl,i,query)
  141. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),query)
  142. print("[*] {}".format(url))
  143. # Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
  144. # are set in JavaScript and not recognized by Requests so we add them here manually
  145. jar = requests.cookies.RequestsCookieJar()
  146. jar.set('_pk_id.10.dd0a', '*', domain='expireddomains.net', path='/')
  147. jar.set('_pk_ses.10.dd0a', '*', domain='expireddomains.net', path='/')
  148. domains = s.get(url,headers=headers,verify=False,cookies=jar).text
  149. # Turn the HTML into a Beautiful Soup object
  150. soup = BeautifulSoup(domains, 'lxml')
  151. table = soup.find("table")
  152. for row in table.findAll('tr')[1:]:
  153. # Alternative way to extract domain name
  154. # domain = row.find('td').find('a').text
  155. cells = row.findAll("td")
  156. if len(cells) >= 1:
  157. output = ""
  158. c0 = row.find('td').find('a').text # domain
  159. c1 = cells[1].find(text=True) # bl
  160. c2 = cells[2].find(text=True) # domainpop
  161. c3 = cells[3].find(text=True) # birth
  162. c4 = cells[4].find(text=True) # entries
  163. c5 = cells[5].find(text=True) # similarweb
  164. c6 = cells[6].find(text=True) # similarweb country code
  165. c7 = cells[7].find(text=True) # moz
  166. c8 = cells[8].find(text=True) # status com
  167. c9 = cells[9].find(text=True) # status net
  168. c10 = cells[10].find(text=True) # status org
  169. c11 = cells[11].find(text=True) # status de
  170. c12 = cells[12].find(text=True) # tld registered
  171. c13 = cells[13].find(text=True) # monthly searches
  172. c14 = cells[14].find(text=True) # adwords competition
  173. c15 = cells[15].find(text=True) # list
  174. c16 = cells[16].find(text=True) # status
  175. c17 = cells[17].find(text=True) # related links
  176. available = ''
  177. if c8 == "available":
  178. available += ".com "
  179. if c9 == "available":
  180. available += ".net "
  181. if c10 == "available":
  182. available += ".org "
  183. if c11 == "available":
  184. available += ".de "
  185. # Skip additional reputation checks if this domain is already categorized as malicious
  186. if c0 in maldomains_list:
  187. print("[-] Skipping {} - Identified as known malware domain").format(c0)
  188. else:
  189. bluecoat = ''
  190. ibmxforce = ''
  191. if c3 == '-':
  192. bluecoat = 'ignored'
  193. ibmxforce = 'ignored'
  194. elif check == True:
  195. bluecoat = checkBluecoat(c0)
  196. print "[+] {} is categorized as: {}".format(c0, bluecoat)
  197. ibmxforce = checkIBMxForce(c0)
  198. print "[+] {} is categorized as: {}".format(c0, ibmxforce)
  199. # Sleep to avoid captchas
  200. time.sleep(random.randrange(10,20))
  201. else:
  202. bluecoat = "skipped"
  203. ibmxforce = "skipped"
  204. # Append parsed domain data to list
  205. data.append([c0,c3,c4,available,bluecoat,ibmxforce])
  206. # Retrieve the most recent expired/deleted domain results
  207. else:
  208. print('[*] Fetching {} expired or deleted domains...').format(query)
  209. # Generate list of URLs to query for expired/deleted domains, queries return 25 results per page
  210. urls = []
  211. for i in range (0,(maxresults/4),25):
  212. urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}'.format(i))
  213. urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}o=changes&r=d'.format(i))
  214. urls.append('https://www.expireddomains.net/deleted-net-domains/?start={}o=changes&r=d'.format(i))
  215. urls.append('https://www.expireddomains.net/deleted-org-domains/?start={}o=changes&r=d'.format(i))
  216. for url in urls:
  217. print("[*] {}".format(url))
  218. expireddomains = s.get(url,headers=headers,verify=False).text
  219. # Turn the HTML into a Beautiful Soup object
  220. soup = BeautifulSoup(expireddomains, 'lxml')
  221. table = soup.find("table")
  222. for row in table.findAll('tr')[1:]:
  223. #print(row)
  224. #domain = row.find('td').find('a').text
  225. cells = row.findAll("td")
  226. if len(cells) >= 1:
  227. output = ""
  228. c0 = cells[0].find(text=True) # domain
  229. c1 = cells[1].find(text=True) # bl
  230. c2 = cells[2].find(text=True) # domainpop
  231. c3 = cells[3].find(text=True) # birth
  232. c4 = cells[4].find(text=True) # entries
  233. c5 = cells[5].find(text=True) # similarweb
  234. c6 = cells[6].find(text=True) # similarweb country code
  235. c7 = cells[7].find(text=True) # moz
  236. c8 = cells[8].find(text=True) # status com
  237. c9 = cells[9].find(text=True) # status net
  238. c10 = cells[10].find(text=True) # status org
  239. c11 = cells[11].find(text=True) # status de
  240. c12 = cells[12].find(text=True) # tld registered
  241. c13 = cells[13].find(text=True) # changes
  242. c14 = cells[14].find(text=True) # whois
  243. # Expired Domains results have an additional 'Availability' column that breaks parsing "deleted" domains
  244. #c15 = cells[15].find(text=True) # related links
  245. available = ''
  246. if c8 == "available":
  247. available += ".com "
  248. if c9 == "available":
  249. available += ".net "
  250. if c10 == "available":
  251. available += ".org "
  252. if c11 == "available":
  253. available += ".de "
  254. # Skip additional reputation checks if this domain is already categorized as malicious
  255. if c0 in maldomains_list:
  256. print("[-] Skipping {} - Identified as known malware domain").format(c0)
  257. else:
  258. bluecoat = ''
  259. ibmxforce = ''
  260. if c3 == '-':
  261. bluecoat = 'ignored'
  262. ibmxforce = 'ignored'
  263. elif check == True:
  264. bluecoat = checkBluecoat(c0)
  265. print "[+] {} is categorized as: {}".format(c0, bluecoat)
  266. ibmxforce = checkIBMxForce(c0)
  267. print "[+] {} is categorized as: {}".format(c0, ibmxforce)
  268. # Sleep to avoid captchas
  269. time.sleep(random.randrange(10,20))
  270. else:
  271. bluecoat = "skipped"
  272. ibmxforce = "skipped"
  273. # Append parsed domain data to list
  274. data.append([c0,c3,c4,available,bluecoat,ibmxforce])
  275. # Sort domain list by column 2 (Birth Year)
  276. sortedData = sorted(data, key=lambda x: x[1], reverse=True)
  277. t.add_rows(sortedData)
  278. header = ['Domain', 'Birth', '#', 'TLDs', 'BC', 'IBM']
  279. t.header(header)
  280. # Build HTML Table
  281. html = ''
  282. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  283. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  284. htmlTableHeader = '''
  285. <table border="1" align="center">
  286. <th>Domain</th>
  287. <th>Birth</th>
  288. <th>Entries</th>
  289. <th>TLDs Available</th>
  290. <th>Bluecoat</th>
  291. <th>Categorization</th>
  292. <th>IBM-xForce</th>
  293. <th>Categorization</th>
  294. <th>WatchGuard</th>
  295. <th>Namecheap</th>
  296. <th>Archive.org</th>
  297. '''
  298. htmlTableBody = ''
  299. htmlTableFooter = '</table>'
  300. htmlFooter = '</body></html>'
  301. # Build HTML table contents
  302. for i in sortedData:
  303. htmlTableBody += '<tr>'
  304. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  305. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  306. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  307. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  308. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/sitereview.jsp#/?search={}" target="_blank">Bluecoat</a></td>'.format(i[0]) # Bluecoat
  309. htmlTableBody += '<td>{}</td>'.format(i[4]) # Bluecoat Categorization
  310. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">IBM-xForce</a></td>'.format(i[0]) # IBM xForce
  311. htmlTableBody += '<td>{}</td>'.format(i[5]) # IBM x-Force Categorization
  312. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  313. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  314. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  315. htmlTableBody += '</tr>'
  316. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  317. logfilename = "{}_domainreport.html".format(timestamp)
  318. log = open(logfilename,'w')
  319. log.write(html)
  320. log.close
  321. print("\n[*] Search complete")
  322. print("[*] Log written to {}\n").format(logfilename)
  323. print(t.draw())