domainhunter.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: Joe Vest and Andrew Chiles
  4. ## Description: Checks expired domains, bluecoat categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # To-do:
  7. # Add reputation categorizations to identify desireable vs undesireable domains
  8. # Code cleanup/optimization
  9. # Add Authenticated "Members-Only" option to download CSV/txt (https://member.expireddomains.net/domains/expiredcom/)
  10. import time
  11. import random
  12. import argparse
  13. import json
  14. ## Functions
  15. def checkBluecoat(domain):
  16. try:
  17. url = 'https://sitereview.bluecoat.com/rest/categorization'
  18. postData = {"url":domain} # HTTP POST Parameters
  19. headers = {'User-Agent':useragent,
  20. 'X-Requested-With':'XMLHttpRequest',
  21. 'Referer':'https://sitereview.bluecoat.com/sitereview.jsp'}
  22. print('[*] BlueCoat Check: {}'.format(domain))
  23. response = s.post(url,headers=headers,data=postData,verify=False)
  24. responseJson = json.loads(response.text)
  25. if 'errorType' in responseJson:
  26. a = responseJson['errorType']
  27. else:
  28. soupA = BeautifulSoup(responseJson['categorization'], 'lxml')
  29. a = soupA.find("a").text
  30. # Print notice if CAPTCHAs are blocking accurate results
  31. if a == 'captcha':
  32. print('[-] Error: Blue Coat CAPTCHA received. Change your IP or manually solve a CAPTCHA at "https://sitereview.bluecoat.com/sitereview.jsp"')
  33. #raw_input('[*] Press Enter to continue...')
  34. return a
  35. except:
  36. print('[-] Error retrieving Bluecoat reputation!')
  37. return "-"
  38. def checkIBMxForce(domain):
  39. try:
  40. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  41. headers = {'User-Agent':useragent,
  42. 'Accept':'application/json, text/plain, */*',
  43. 'x-ui':'XFE',
  44. 'Origin':url,
  45. 'Referer':url}
  46. print('[*] IBM xForce Check: {}'.format(domain))
  47. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  48. response = s.get(url,headers=headers,verify=False)
  49. responseJson = json.loads(response.text)
  50. if 'error' in responseJson:
  51. a = responseJson['error']
  52. else:
  53. a = responseJson["result"]['cats']
  54. return a
  55. except:
  56. print('[-] Error retrieving IBM x-Force reputation!')
  57. return "-"
  58. def downloadMalwareDomains():
  59. url = malwaredomains
  60. response = s.get(url,headers=headers,verify=False)
  61. responseText = response.text
  62. if response.status_code == 200:
  63. return responseText
  64. else:
  65. print("Error reaching:{} Status: {}").format(url, response.status_code)
  66. ## MAIN
  67. if __name__ == "__main__":
  68. try:
  69. import requests
  70. from bs4 import BeautifulSoup
  71. from texttable import Texttable
  72. except Exception as e:
  73. print("Expired Domains Reputation Check")
  74. print("[-] Missing dependencies: {}".format(str(e)))
  75. print("[*] Install required dependencies by running `pip install -r requirements.txt`")
  76. quit(0)
  77. parser = argparse.ArgumentParser(description='Checks expired domains, bluecoat categorization, and Archive.org history to determine good candidates for C2 and phishing domains')
  78. parser.add_argument('-q','--query', help='Optional keyword used to refine search results', required=False, type=str)
  79. parser.add_argument('-c','--check', help='Perform slow reputation checks', required=False, default=False, action='store_true')
  80. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains (min. 100)', required=False, type=int, default=100)
  81. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, type=int, default=400)
  82. #parser.add_argument('-f','--file', help='Input file containing potential domain names to check (1 per line)', required=False, type=str)
  83. args = parser.parse_args()
  84. ## Variables
  85. query = False
  86. if args.query:
  87. query = args.query
  88. check = args.check
  89. maxresults = args.maxresults
  90. if maxresults < 100:
  91. maxresults = 100
  92. maxwidth=args.maxwidth
  93. # TODO: Add Input file support
  94. #inputfile = False
  95. #if args.file:
  96. # inputfile = args.file
  97. t = Texttable(max_width=maxwidth)
  98. malwaredomains = 'http://mirror1.malwaredomains.com/files/justdomains'
  99. expireddomainsqueryurl = 'https://www.expireddomains.net/domain-name-search'
  100. timestamp = time.strftime("%Y%m%d_%H%M%S")
  101. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  102. headers = {'User-Agent':useragent}
  103. requests.packages.urllib3.disable_warnings()
  104. # HTTP Session container, used to manage cookies, session tokens and other session information
  105. s = requests.Session()
  106. data = []
  107. title = '''
  108. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  109. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  110. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  111. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  112. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  113. print(title)
  114. print("")
  115. print("Expired Domains Reputation Checker")
  116. print("")
  117. print("DISCLAIMER:")
  118. print("This is for educational purposes only!")
  119. disclaimer = '''It is designed to promote education and the improvement of computer/cyber security.
  120. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  121. If you plan to use this content for illegal purpose, don't. Have a nice day :)'''
  122. print(disclaimer)
  123. print("")
  124. print("********************************************")
  125. print("Start Time: {}".format(timestamp))
  126. print("TextTable Column Width: {}".format(str(maxwidth)))
  127. print("Checking Reputation: {}".format(str(check)))
  128. print("Number Domains Checked: {}".format(maxresults))
  129. print("********************************************")
  130. runtime = 0
  131. if check:
  132. runtime = (maxresults * 20) / 60
  133. else:
  134. runtime = maxresults * .15 / 60
  135. print("Estimated Max Run Time: {} minutes".format(int(runtime)))
  136. print("")
  137. print('[*] Downloading malware domain list from {}'.format(malwaredomains))
  138. maldomains = downloadMalwareDomains()
  139. maldomains_list = maldomains.split("\n")
  140. # Create an initial session
  141. domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False)
  142. # Generate list of URLs to query for expired/deleted domains, queries return 25 results per page
  143. urls = []
  144. # Use the keyword string to narrow domain search if provided
  145. if query:
  146. print('[*] Fetching expired or deleted domains containing "{}"'.format(query))
  147. for i in range (0,maxresults,25):
  148. if i == 0:
  149. urls.append("{}/?q={}".format(expireddomainsqueryurl,query))
  150. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(query)
  151. else:
  152. urls.append("{}/?start={}&q={}".format(expireddomainsqueryurl,i,query))
  153. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),query)
  154. else:
  155. print('[*] Fetching expired or deleted domains...')
  156. for i in range (0,(maxresults),25):
  157. urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}&o=changed&r=a'.format(i))
  158. urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}&o=changed&r=a'.format(i))
  159. urls.append('https://www.expireddomains.net/deleted-net-domains/?start={}&o=changed&r=a'.format(i))
  160. urls.append('https://www.expireddomains.net/deleted-org-domains/?start={}&o=changed&r=a'.format(i))
  161. for url in urls:
  162. print("[*] {}".format(url))
  163. # Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
  164. # are set in JavaScript and not recognized by Requests so we add them here manually.
  165. # May not be needed, but the _pk_id.10.dd0a cookie only requires a single . to be successful
  166. # In order to somewhat match a real cookie, but still be different, random integers are introduced
  167. r1 = random.randint(100000,999999)
  168. r2 = random.randint(100000,999999)
  169. r3 = random.randint(100000,999999)
  170. pk_str = '843f8d071e27aa52' + '.1496' + str(r1) + '.2.1496' + str(r2) + '.1496' + str(r3)
  171. jar = requests.cookies.RequestsCookieJar()
  172. jar.set('_pk_id.10.dd0a', '843f8d071e27aa52.1496597944.2.1496602069.1496601572.', domain='expireddomains.net', path='/')
  173. jar.set('_pk_ses.10.dd0a', '*', domain='expireddomains.net', path='/')
  174. domainrequest = s.get(url,headers=headers,verify=False,cookies=jar)
  175. domains = domainrequest.text
  176. # Turn the HTML into a Beautiful Soup object
  177. soup = BeautifulSoup(domains, 'lxml')
  178. table = soup.find("table")
  179. try:
  180. for row in table.findAll('tr')[1:]:
  181. # Alternative way to extract domain name
  182. # domain = row.find('td').find('a').text
  183. cells = row.findAll("td")
  184. if len(cells) >= 1:
  185. output = ""
  186. if query:
  187. c0 = row.find('td').find('a').text # domain
  188. c1 = cells[1].find(text=True) # bl
  189. c2 = cells[2].find(text=True) # domainpop
  190. c3 = cells[3].find(text=True) # birth
  191. c4 = cells[4].find(text=True) # entries
  192. c5 = cells[5].find(text=True) # similarweb
  193. c6 = cells[6].find(text=True) # similarweb country code
  194. c7 = cells[7].find(text=True) # moz
  195. c8 = cells[8].find(text=True) # status com
  196. c9 = cells[9].find(text=True) # status net
  197. c10 = cells[10].find(text=True) # status org
  198. c11 = cells[11].find(text=True) # status de
  199. c12 = cells[12].find(text=True) # tld registered
  200. c13 = cells[13].find(text=True) # monthly searches
  201. c14 = cells[14].find(text=True) # adwords competition
  202. c15 = cells[15].find(text=True) # list
  203. c16 = cells[16].find(text=True) # status
  204. c17 = cells[17].find(text=True) # related links
  205. else:
  206. c0 = cells[0].find(text=True) # domain
  207. c1 = cells[1].find(text=True) # bl
  208. c2 = cells[2].find(text=True) # domainpop
  209. c3 = cells[3].find(text=True) # birth
  210. c4 = cells[4].find(text=True) # entries
  211. c5 = cells[5].find(text=True) # similarweb
  212. c6 = cells[6].find(text=True) # similarweb country code
  213. c7 = cells[7].find(text=True) # moz
  214. c8 = cells[8].find(text=True) # status com
  215. c9 = cells[9].find(text=True) # status net
  216. c10 = cells[10].find(text=True) # status org
  217. c11 = cells[11].find(text=True) # status de
  218. c12 = cells[12].find(text=True) # tld registered
  219. c13 = cells[13].find(text=True) # changes
  220. c14 = cells[14].find(text=True) # whois
  221. c15 = "" # not used
  222. c16 = "" # not used
  223. c17 = "" # not used
  224. # Expired Domains results have an additional 'Availability' column that breaks parsing "deleted" domains
  225. #c15 = cells[15].find(text=True) # related links
  226. available = ''
  227. if c8 == "available":
  228. available += ".com "
  229. if c9 == "available":
  230. available += ".net "
  231. if c10 == "available":
  232. available += ".org "
  233. if c11 == "available":
  234. available += ".de "
  235. status = ""
  236. if c16:
  237. status = c16
  238. # Skip additional reputation checks if this domain is already categorized as malicious
  239. if c0 in maldomains_list:
  240. print("[-] Skipping {} - Identified as known malware domain").format(c0)
  241. else:
  242. bluecoat = ''
  243. ibmxforce = ''
  244. if c3 == '-':
  245. bluecoat = 'ignored'
  246. ibmxforce = 'ignored'
  247. elif check == True:
  248. bluecoat = checkBluecoat(c0)
  249. print("[+] {} is categorized as: {}".format(c0, bluecoat))
  250. ibmxforce = checkIBMxForce(c0)
  251. print("[+] {} is categorized as: {}".format(c0, ibmxforce))
  252. # Sleep to avoid captchas
  253. time.sleep(random.randrange(10,20))
  254. else:
  255. bluecoat = "skipped"
  256. ibmxforce = "skipped"
  257. # Append parsed domain data to list
  258. data.append([c0,c3,c4,available,status,bluecoat,ibmxforce])
  259. except:
  260. print("[-] Error: No results found on this page!")
  261. # TODO: Add support of input file
  262. # Retrieve the most recent expired/deleted domain results
  263. # elif inputfile:
  264. # print('[*] Fetching domain reputation from file: {}').format(inputfile)
  265. # # read in file contents to list
  266. # try:
  267. # domains = [line.rstrip('\r\n') for line in open(inputfile, "r")]
  268. # except IOError:
  269. # print '[-] Error: "{}" does not appear to exist.'.format(inputfile)
  270. # exit()
  271. # print('[*] Domains loaded: {}').format(len(domains))
  272. # for domain in domains:
  273. # if domain in maldomains_list:
  274. # print("[-] Skipping {} - Identified as known malware domain").format(domain)
  275. # else:
  276. # bluecoat = ''
  277. # ibmxforce = ''
  278. # bluecoat = checkBluecoat(domain)
  279. # print "[+] {} is categorized as: {}".format(domain, bluecoat)
  280. # ibmxforce = checkIBMxForce(domain)
  281. # print "[+] {} is categorized as: {}".format(domain, ibmxforce)
  282. # # Sleep to avoid captchas
  283. # time.sleep(random.randrange(10,20))
  284. # data.append([domain,'-','-','-',bluecoat,ibmxforce])
  285. # Sort domain list by column 2 (Birth Year)
  286. sortedData = sorted(data, key=lambda x: x[1], reverse=True)
  287. t.add_rows(sortedData)
  288. header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'BC', 'IBM']
  289. t.header(header)
  290. # Build HTML Table
  291. html = ''
  292. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  293. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  294. htmlTableHeader = '''
  295. <table border="1" align="center">
  296. <th>Domain</th>
  297. <th>Birth</th>
  298. <th>Entries</th>
  299. <th>TLDs Available</th>
  300. <th>Status</th>
  301. <th>Bluecoat</th>
  302. <th>Categorization</th>
  303. <th>IBM-xForce</th>
  304. <th>Categorization</th>
  305. <th>WatchGuard</th>
  306. <th>Namecheap</th>
  307. <th>Archive.org</th>
  308. '''
  309. htmlTableBody = ''
  310. htmlTableFooter = '</table>'
  311. htmlFooter = '</body></html>'
  312. # Build HTML table contents
  313. for i in sortedData:
  314. htmlTableBody += '<tr>'
  315. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  316. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  317. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  318. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  319. htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
  320. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/sitereview.jsp#/?search={}" target="_blank">Bluecoat</a></td>'.format(i[0]) # Bluecoat
  321. htmlTableBody += '<td>{}</td>'.format(i[5]) # Bluecoat Categorization
  322. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">IBM-xForce</a></td>'.format(i[0]) # IBM xForce
  323. htmlTableBody += '<td>{}</td>'.format(i[6]) # IBM x-Force Categorization
  324. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  325. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  326. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  327. htmlTableBody += '</tr>'
  328. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  329. logfilename = "{}_domainreport.html".format(timestamp)
  330. log = open(logfilename,'w')
  331. log.write(html)
  332. log.close
  333. print("\n[*] Search complete")
  334. print("[*] Log written to {}\n".format(logfilename))
  335. print(t.draw())