domainhunter.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: Joe Vest and Andrew Chiles
  4. ## Description: Checks expired domains, bluecoat categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # To-do:
  7. # Add reputation categorizations to identify desireable vs undesireable domains
  8. # Code cleanup/optimization
  9. # Add Authenticated "Members-Only" option to download CSV/txt (https://member.expireddomains.net/domains/expiredcom/)
  10. import time
  11. import random
  12. import argparse
  13. import json
  14. ## Functions
  15. def checkBluecoat(domain):
  16. try:
  17. url = 'https://sitereview.bluecoat.com/rest/categorization'
  18. postData = {"url":domain} # HTTP POST Parameters
  19. headers = {'User-Agent':useragent,
  20. 'X-Requested-With':'XMLHttpRequest',
  21. 'Referer':'https://sitereview.bluecoat.com/sitereview.jsp'}
  22. print('[*] BlueCoat Check: {}'.format(domain))
  23. response = s.post(url,headers=headers,data=postData,verify=False)
  24. responseJson = json.loads(response.text)
  25. if 'errorType' in responseJson:
  26. a = responseJson['errorType']
  27. else:
  28. soupA = BeautifulSoup(responseJson['categorization'], 'lxml')
  29. a = soupA.find("a").text
  30. # Print notice if CAPTCHAs are blocking accurate results
  31. if a == 'captcha':
  32. print('[-] Error: Blue Coat CAPTCHA received. Change your IP or manually solve a CAPTCHA at "https://sitereview.bluecoat.com/sitereview.jsp"')
  33. #raw_input('[*] Press Enter to continue...')
  34. return a
  35. except:
  36. print('[-] Error retrieving Bluecoat reputation!')
  37. return "-"
  38. def checkIBMxForce(domain):
  39. try:
  40. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  41. headers = {'User-Agent':useragent,
  42. 'Accept':'application/json, text/plain, */*',
  43. 'x-ui':'XFE',
  44. 'Origin':url,
  45. 'Referer':url}
  46. print('[*] IBM xForce Check: {}'.format(domain))
  47. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  48. response = s.get(url,headers=headers,verify=False)
  49. responseJson = json.loads(response.text)
  50. if 'error' in responseJson:
  51. a = responseJson['error']
  52. else:
  53. a = responseJson["result"]['cats']
  54. return a
  55. except:
  56. print('[-] Error retrieving IBM x-Force reputation!')
  57. return "-"
  58. def downloadMalwareDomains():
  59. url = malwaredomains
  60. response = s.get(url,headers=headers,verify=False)
  61. responseText = response.text
  62. if response.status_code == 200:
  63. return responseText
  64. else:
  65. print("Error reaching:{} Status: {}").format(url, response.status_code)
  66. ## MAIN
  67. if __name__ == "__main__":
  68. try:
  69. import requests
  70. from bs4 import BeautifulSoup
  71. from texttable import Texttable
  72. except Exception as e:
  73. print("Expired Domains Reputation Check")
  74. print("[-] Missing dependencies: {}".format(str(e)))
  75. print("[*] Install required dependencies by running `pip install -r requirements.txt`")
  76. quit(0)
  77. parser = argparse.ArgumentParser(description='Checks expired domains, bluecoat categorization, and Archive.org history to determine good candidates for C2 and phishing domains')
  78. parser.add_argument('-q','--query', help='Optional keyword used to refine search results', required=False, type=str)
  79. parser.add_argument('-c','--check', help='Perform slow reputation checks', required=False, default=False, action='store_true')
  80. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains (min. 100)', required=False, type=int, default=100)
  81. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, type=int, default=400)
  82. #parser.add_argument('-f','--file', help='Input file containing potential domain names to check (1 per line)', required=False, type=str)
  83. args = parser.parse_args()
  84. ## Variables
  85. query = False
  86. if args.query:
  87. query = args.query
  88. check = args.check
  89. maxresults = args.maxresults
  90. if maxresults < 100:
  91. maxresults = 100
  92. maxwidth=args.maxwidth
  93. # TODO: Add Input file support
  94. #inputfile = False
  95. #if args.file:
  96. # inputfile = args.file
  97. t = Texttable(max_width=maxwidth)
  98. malwaredomains = 'http://mirror1.malwaredomains.com/files/justdomains'
  99. expireddomainsqueryurl = 'https://www.expireddomains.net/domain-name-search'
  100. timestamp = time.strftime("%Y%m%d_%H%M%S")
  101. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  102. headers = {'User-Agent':useragent}
  103. requests.packages.urllib3.disable_warnings()
  104. # HTTP Session container, used to manage cookies, session tokens and other session information
  105. s = requests.Session()
  106. data = []
  107. title = '''
  108. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  109. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  110. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  111. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  112. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  113. print(title)
  114. print("")
  115. print("Expired Domains Reputation Checker")
  116. print("")
  117. print("DISCLAIMER:")
  118. print("This is for educational purposes only!")
  119. disclaimer = '''It is designed to promote education and the improvement of computer/cyber security.
  120. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  121. If you plan to use this content for illegal purpose, don't. Have a nice day :)'''
  122. print(disclaimer)
  123. print("")
  124. print("********************************************")
  125. print("Start Time: {}".format(timestamp))
  126. print("TextTable Column Width: {}".format(str(maxwidth)))
  127. print("Checking Reputation: {}".format(str(check)))
  128. print("Number Domains Checked: {}".format(maxresults))
  129. print("********************************************")
  130. runtime = 0
  131. if check:
  132. runtime = (maxresults * 20) / 60
  133. else:
  134. runtime = maxresults * .15 / 60
  135. print("Estimated Max Run Time: {} minutes".format(int(runtime)))
  136. print("")
  137. print('[*] Downloading malware domain list from {}'.format(malwaredomains))
  138. maldomains = downloadMalwareDomains()
  139. maldomains_list = maldomains.split("\n")
  140. # Create an initial session
  141. # Generic Proxy support
  142. # TODO: add as a parameter
  143. proxies = {
  144. 'http': 'http://127.0.0.1:8080',
  145. 'https': 'http://127.0.0.1:8080',
  146. }
  147. domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False)
  148. #domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False,proxies=proxies)
  149. # Generate list of URLs to query for expired/deleted domains, queries return 25 results per page
  150. urls = []
  151. # Use the keyword string to narrow domain search if provided
  152. if query:
  153. print('[*] Fetching expired or deleted domains containing "{}"'.format(query))
  154. for i in range (0,maxresults,25):
  155. if i == 0:
  156. urls.append("{}/?q={}".format(expireddomainsqueryurl,query))
  157. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(query)
  158. else:
  159. urls.append("{}/?start={}&q={}".format(expireddomainsqueryurl,i,query))
  160. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),query)
  161. else:
  162. print('[*] Fetching expired or deleted domains...')
  163. for i in range (0,(maxresults),25):
  164. urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}&o=changed&r=a'.format(i))
  165. urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}&o=changed&r=a'.format(i))
  166. urls.append('https://www.expireddomains.net/deleted-net-domains/?start={}&o=changed&r=a'.format(i))
  167. urls.append('https://www.expireddomains.net/deleted-org-domains/?start={}&o=changed&r=a'.format(i))
  168. for url in urls:
  169. print("[*] {}".format(url))
  170. # Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
  171. # are set in JavaScript and not recognized by Requests so we add them here manually.
  172. # May not be needed, but the _pk_id.10.dd0a cookie only requires a single . to be successful
  173. # In order to somewhat match a real cookie, but still be different, random integers are introduced
  174. r1 = random.randint(100000,999999)
  175. # Known good example _pk_id.10.dd0a cookie: 5abbbc772cbacfb1.1496760705.2.1496760705.1496760705
  176. pk_str = '5abbbc772cbacfb1' + '.1496' + str(r1) + '.2.1496' + str(r1) + '.1496' + str(r1)
  177. jar = requests.cookies.RequestsCookieJar()
  178. #jar.set('_pk_id.10.dd0a', '843f8d071e27aa52.1496597944.2.1496602069.1496601572.', domain='expireddomains.net', path='/')
  179. jar.set('_pk_ses.10.dd0a', '*', domain='expireddomains.net', path='/')
  180. jar.set('_pk_id.10.dd0a', pk_str, domain='expireddomains.net', path='/')
  181. domainrequest = s.get(url,headers=headers,verify=False,cookies=jar)
  182. #domainrequest = s.get(url,headers=headers,verify=False,cookies=jar,proxies=proxies)
  183. domains = domainrequest.text
  184. # Turn the HTML into a Beautiful Soup object
  185. soup = BeautifulSoup(domains, 'lxml')
  186. table = soup.find("table")
  187. try:
  188. for row in table.findAll('tr')[1:]:
  189. # Alternative way to extract domain name
  190. # domain = row.find('td').find('a').text
  191. cells = row.findAll("td")
  192. if len(cells) >= 1:
  193. output = ""
  194. if query:
  195. c0 = row.find('td').find('a').text # domain
  196. c1 = cells[1].find(text=True) # bl
  197. c2 = cells[2].find(text=True) # domainpop
  198. c3 = cells[3].find(text=True) # birth
  199. c4 = cells[4].find(text=True) # entries
  200. c5 = cells[5].find(text=True) # similarweb
  201. c6 = cells[6].find(text=True) # similarweb country code
  202. c7 = cells[7].find(text=True) # moz
  203. c8 = cells[8].find(text=True) # status com
  204. c9 = cells[9].find(text=True) # status net
  205. c10 = cells[10].find(text=True) # status org
  206. c11 = cells[11].find(text=True) # status de
  207. c12 = cells[12].find(text=True) # tld registered
  208. c13 = cells[13].find(text=True) # monthly searches
  209. c14 = cells[14].find(text=True) # adwords competition
  210. c15 = cells[15].find(text=True) # list
  211. c16 = cells[16].find(text=True) # status
  212. c17 = cells[17].find(text=True) # related links
  213. else:
  214. c0 = cells[0].find(text=True) # domain
  215. c1 = cells[1].find(text=True) # bl
  216. c2 = cells[2].find(text=True) # domainpop
  217. c3 = cells[3].find(text=True) # birth
  218. c4 = cells[4].find(text=True) # entries
  219. c5 = cells[5].find(text=True) # similarweb
  220. c6 = cells[6].find(text=True) # similarweb country code
  221. c7 = cells[7].find(text=True) # moz
  222. c8 = cells[8].find(text=True) # status com
  223. c9 = cells[9].find(text=True) # status net
  224. c10 = cells[10].find(text=True) # status org
  225. c11 = cells[11].find(text=True) # status de
  226. c12 = cells[12].find(text=True) # tld registered
  227. c13 = cells[13].find(text=True) # changes
  228. c14 = cells[14].find(text=True) # whois
  229. c15 = "" # not used
  230. c16 = "" # not used
  231. c17 = "" # not used
  232. # Expired Domains results have an additional 'Availability' column that breaks parsing "deleted" domains
  233. #c15 = cells[15].find(text=True) # related links
  234. available = ''
  235. if c8 == "available":
  236. available += ".com "
  237. if c9 == "available":
  238. available += ".net "
  239. if c10 == "available":
  240. available += ".org "
  241. if c11 == "available":
  242. available += ".de "
  243. status = ""
  244. if c16:
  245. status = c16
  246. # Skip additional reputation checks if this domain is already categorized as malicious
  247. if c0 in maldomains_list:
  248. print("[-] Skipping {} - Identified as known malware domain").format(c0)
  249. else:
  250. bluecoat = ''
  251. ibmxforce = ''
  252. if c3 == '-':
  253. bluecoat = 'ignored'
  254. ibmxforce = 'ignored'
  255. elif check == True:
  256. bluecoat = checkBluecoat(c0)
  257. print("[+] {} is categorized as: {}".format(c0, bluecoat))
  258. ibmxforce = checkIBMxForce(c0)
  259. print("[+] {} is categorized as: {}".format(c0, ibmxforce))
  260. # Sleep to avoid captchas
  261. time.sleep(random.randrange(10,20))
  262. else:
  263. bluecoat = "skipped"
  264. ibmxforce = "skipped"
  265. # Append parsed domain data to list
  266. data.append([c0,c3,c4,available,status,bluecoat,ibmxforce])
  267. except:
  268. print("[-] Error: No results found on this page!")
  269. # TODO: Add support of input file
  270. # Retrieve the most recent expired/deleted domain results
  271. # elif inputfile:
  272. # print('[*] Fetching domain reputation from file: {}').format(inputfile)
  273. # # read in file contents to list
  274. # try:
  275. # domains = [line.rstrip('\r\n') for line in open(inputfile, "r")]
  276. # except IOError:
  277. # print '[-] Error: "{}" does not appear to exist.'.format(inputfile)
  278. # exit()
  279. # print('[*] Domains loaded: {}').format(len(domains))
  280. # for domain in domains:
  281. # if domain in maldomains_list:
  282. # print("[-] Skipping {} - Identified as known malware domain").format(domain)
  283. # else:
  284. # bluecoat = ''
  285. # ibmxforce = ''
  286. # bluecoat = checkBluecoat(domain)
  287. # print "[+] {} is categorized as: {}".format(domain, bluecoat)
  288. # ibmxforce = checkIBMxForce(domain)
  289. # print "[+] {} is categorized as: {}".format(domain, ibmxforce)
  290. # # Sleep to avoid captchas
  291. # time.sleep(random.randrange(10,20))
  292. # data.append([domain,'-','-','-',bluecoat,ibmxforce])
  293. # Sort domain list by column 2 (Birth Year)
  294. sortedData = sorted(data, key=lambda x: x[1], reverse=True)
  295. t.add_rows(sortedData)
  296. header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'BC', 'IBM']
  297. t.header(header)
  298. # Build HTML Table
  299. html = ''
  300. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  301. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  302. htmlTableHeader = '''
  303. <table border="1" align="center">
  304. <th>Domain</th>
  305. <th>Birth</th>
  306. <th>Entries</th>
  307. <th>TLDs Available</th>
  308. <th>Status</th>
  309. <th>Bluecoat</th>
  310. <th>Categorization</th>
  311. <th>IBM-xForce</th>
  312. <th>Categorization</th>
  313. <th>WatchGuard</th>
  314. <th>Namecheap</th>
  315. <th>Archive.org</th>
  316. '''
  317. htmlTableBody = ''
  318. htmlTableFooter = '</table>'
  319. htmlFooter = '</body></html>'
  320. # Build HTML table contents
  321. for i in sortedData:
  322. htmlTableBody += '<tr>'
  323. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  324. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  325. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  326. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  327. htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
  328. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/sitereview.jsp#/?search={}" target="_blank">Bluecoat</a></td>'.format(i[0]) # Bluecoat
  329. htmlTableBody += '<td>{}</td>'.format(i[5]) # Bluecoat Categorization
  330. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">IBM-xForce</a></td>'.format(i[0]) # IBM xForce
  331. htmlTableBody += '<td>{}</td>'.format(i[6]) # IBM x-Force Categorization
  332. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  333. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  334. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  335. htmlTableBody += '</tr>'
  336. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  337. logfilename = "{}_domainreport.html".format(timestamp)
  338. log = open(logfilename,'w')
  339. log.write(html)
  340. log.close
  341. print("\n[*] Search complete")
  342. print("[*] Log written to {}\n".format(logfilename))
  343. print(t.draw())