domainhunter.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: Joe Vest and Andrew Chiles
  4. ## Description: Checks expired domains, bluecoat categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # To-do:
  7. # Add reputation categorizations to identify desireable vs undesireable domains
  8. # Code cleanup/optimization
  9. # Add Authenticated "Members-Only" option to download CSV/txt (https://member.expireddomains.net/domains/expiredcom/)
  10. import time
  11. import random
  12. import argparse
  13. import json
  14. ## Functions
  15. def checkBluecoat(domain):
  16. try:
  17. url = 'https://sitereview.bluecoat.com/rest/categorization'
  18. postData = {"url":domain} # HTTP POST Parameters
  19. headers = {'User-Agent':useragent,
  20. 'X-Requested-With':'XMLHttpRequest',
  21. 'Referer':'https://sitereview.bluecoat.com/sitereview.jsp'}
  22. print('[*] BlueCoat Check: {}'.format(domain))
  23. response = s.post(url,headers=headers,data=postData,verify=False)
  24. responseJson = json.loads(response.text)
  25. if 'errorType' in responseJson:
  26. a = responseJson['errorType']
  27. else:
  28. soupA = BeautifulSoup(responseJson['categorization'], 'lxml')
  29. a = soupA.find("a").text
  30. # Print notice if CAPTCHAs are blocking accurate results
  31. if a == 'captcha':
  32. print('[-] Error: Blue Coat CAPTCHA received. Change your IP or manually solve a CAPTCHA at "https://sitereview.bluecoat.com/sitereview.jsp"')
  33. #raw_input('[*] Press Enter to continue...')
  34. return a
  35. except:
  36. print('[-] Error retrieving Bluecoat reputation!')
  37. return "-"
  38. def checkIBMxForce(domain):
  39. try:
  40. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  41. headers = {'User-Agent':useragent,
  42. 'Accept':'application/json, text/plain, */*',
  43. 'x-ui':'XFE',
  44. 'Origin':url,
  45. 'Referer':url}
  46. print('[*] IBM xForce Check: {}'.format(domain))
  47. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  48. response = s.get(url,headers=headers,verify=False)
  49. responseJson = json.loads(response.text)
  50. if 'error' in responseJson:
  51. a = responseJson['error']
  52. else:
  53. a = responseJson["result"]['cats']
  54. return a
  55. except:
  56. print('[-] Error retrieving IBM x-Force reputation!')
  57. return "-"
  58. def downloadMalwareDomains():
  59. url = malwaredomains
  60. response = s.get(url,headers=headers,verify=False)
  61. responseText = response.text
  62. if response.status_code == 200:
  63. return responseText
  64. else:
  65. print("Error reaching:{} Status: {}").format(url, response.status_code)
  66. ## MAIN
  67. if __name__ == "__main__":
  68. try:
  69. import requests
  70. from bs4 import BeautifulSoup
  71. from texttable import Texttable
  72. except Exception as e:
  73. print("Expired Domains Reputation Check")
  74. print("[-] Missing dependencies: {}".format(str(e)))
  75. print("[*] Install required dependencies by running `pip install -r requirements.txt`")
  76. quit(0)
  77. parser = argparse.ArgumentParser(description='Checks expired domains, bluecoat categorization, and Archive.org history to determine good candidates for C2 and phishing domains')
  78. parser.add_argument('-q','--query', help='Optional keyword used to refine search results', required=False, type=str)
  79. parser.add_argument('-c','--check', help='Perform slow reputation checks', required=False, default=False, action='store_true')
  80. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains (min. 100)', required=False, type=int, default=100)
  81. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, type=int, default=400)
  82. #parser.add_argument('-f','--file', help='Input file containing potential domain names to check (1 per line)', required=False, type=str)
  83. args = parser.parse_args()
  84. ## Variables
  85. query = False
  86. if args.query:
  87. query = args.query
  88. check = args.check
  89. maxresults = args.maxresults
  90. if maxresults < 100:
  91. maxresults = 100
  92. maxwidth=args.maxwidth
  93. # TODO: Add Input file support
  94. #inputfile = False
  95. #if args.file:
  96. # inputfile = args.file
  97. t = Texttable(max_width=maxwidth)
  98. malwaredomains = 'http://mirror1.malwaredomains.com/files/justdomains'
  99. expireddomainsqueryurl = 'https://www.expireddomains.net/domain-name-search'
  100. timestamp = time.strftime("%Y%m%d_%H%M%S")
  101. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  102. headers = {'User-Agent':useragent}
  103. requests.packages.urllib3.disable_warnings()
  104. # HTTP Session container, used to manage cookies, session tokens and other session information
  105. s = requests.Session()
  106. data = []
  107. title = '''
  108. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  109. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  110. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  111. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  112. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  113. print(title)
  114. print("")
  115. print("Expired Domains Reputation Checker")
  116. print("")
  117. print("DISCLAIMER:")
  118. print("This is for educational purposes only!")
  119. disclaimer = '''It is designed to promote education and the improvement of computer/cyber security.
  120. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  121. If you plan to use this content for illegal purpose, don't. Have a nice day :)'''
  122. print(disclaimer)
  123. print("")
  124. print("********************************************")
  125. print("Start Time: {}".format(timestamp))
  126. print("TextTable Column Width: {}".format(str(maxwidth)))
  127. print("Checking Reputation: {}".format(str(check)))
  128. print("Number Domains Checked: {}".format(maxresults))
  129. print("********************************************")
  130. runtime = 0
  131. if check:
  132. runtime = (maxresults * 20) / 60
  133. else:
  134. runtime = maxresults * .15 / 60
  135. print("Estimated Max Run Time: {} minutes".format(int(runtime)))
  136. print("")
  137. print('[*] Downloading malware domain list from {}'.format(malwaredomains))
  138. maldomains = downloadMalwareDomains()
  139. maldomains_list = maldomains.split("\n")
  140. # Create an initial session
  141. # Generic Proxy support
  142. # TODO: add as a parameter
  143. proxies = {
  144. 'http': 'http://127.0.0.1:8080',
  145. 'https': 'http://127.0.0.1:8080',
  146. }
  147. domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False)
  148. #domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False,proxies=proxies)
  149. # Generate list of URLs to query for expired/deleted domains, queries return 25 results per page
  150. urls = []
  151. # Use the keyword string to narrow domain search if provided
  152. if query:
  153. print('[*] Fetching expired or deleted domains containing "{}"'.format(query))
  154. for i in range (0,maxresults,25):
  155. if i == 0:
  156. urls.append("{}/?q={}".format(expireddomainsqueryurl,query))
  157. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(query)
  158. else:
  159. urls.append("{}/?start={}&q={}".format(expireddomainsqueryurl,i,query))
  160. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),query)
  161. else:
  162. print('[*] Fetching expired or deleted domains...')
  163. for i in range (0,(maxresults),25):
  164. urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}&o=changed&r=a'.format(i))
  165. urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}&o=changed&r=a'.format(i))
  166. urls.append('https://www.expireddomains.net/deleted-net-domains/?start={}&o=changed&r=a'.format(i))
  167. urls.append('https://www.expireddomains.net/deleted-org-domains/?start={}&o=changed&r=a'.format(i))
  168. for url in urls:
  169. print("[*] {}".format(url))
  170. # Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
  171. # are set in JavaScript and not recognized by Requests so we add them here manually.
  172. # May not be needed, but the _pk_id.10.dd0a cookie only requires a single . to be successful
  173. # In order to somewhat match a real cookie, but still be different, random integers are introduced
  174. r1 = random.randint(100000,999999)
  175. # Known good example _pk_id.10.dd0a cookie: 5abbbc772cbacfb1.1496760705.2.1496760705.1496760705
  176. pk_str = '5abbbc772cbacfb1' + '.1496' + str(r1) + '.2.1496' + str(r1) + '.1496' + str(r1)
  177. jar = requests.cookies.RequestsCookieJar()
  178. #jar.set('_pk_id.10.dd0a', '843f8d071e27aa52.1496597944.2.1496602069.1496601572.', domain='expireddomains.net', path='/')
  179. jar.set('_pk_ses.10.dd0a', '*', domain='expireddomains.net', path='/')
  180. jar.set('_pk_id.10.dd0a', pk_str, domain='expireddomains.net', path='/')
  181. domainrequest = s.get(url,headers=headers,verify=False,cookies=jar)
  182. #domainrequest = s.get(url,headers=headers,verify=False,cookies=jar,proxies=proxies)
  183. domains = domainrequest.text
  184. # Turn the HTML into a Beautiful Soup object
  185. soup = BeautifulSoup(domains, 'lxml')
  186. table = soup.find("table")
  187. try:
  188. for row in table.findAll('tr')[1:]:
  189. # Alternative way to extract domain name
  190. # domain = row.find('td').find('a').text
  191. cells = row.findAll("td")
  192. if len(cells) >= 1:
  193. output = ""
  194. if query:
  195. c0 = row.find('td').find('a').text # domain
  196. c1 = cells[1].find(text=True) # bl
  197. c2 = cells[2].find(text=True) # domainpop
  198. c3 = cells[3].find(text=True) # birth
  199. c4 = cells[4].find(text=True) # Archive.org entries
  200. c5 = cells[5].find(text=True) # similarweb
  201. c6 = cells[6].find(text=True) # similarweb country code
  202. c7 = cells[7].find(text=True) # Dmoz.org
  203. c8 = cells[8].find(text=True) # status com
  204. c9 = cells[9].find(text=True) # status net
  205. c10 = cells[10].find(text=True) # status org
  206. c11 = cells[11].find(text=True) # status de
  207. c12 = cells[12].find(text=True) # tld registered
  208. c13 = cells[13].find(text=True) # Related Domains
  209. c14 = cells[14].find(text=True) # Domain list
  210. c15 = cells[15].find(text=True) # status
  211. c16 = cells[16].find(text=True) # related links
  212. else:
  213. c0 = cells[0].find(text=True) # domain
  214. c1 = cells[1].find(text=True) # bl
  215. c2 = cells[2].find(text=True) # domainpop
  216. c3 = cells[3].find(text=True) # birth
  217. c4 = cells[4].find(text=True) # Archive.org entries
  218. c5 = cells[5].find(text=True) # similarweb
  219. c6 = cells[6].find(text=True) # similarweb country code
  220. c7 = cells[7].find(text=True) # Dmoz.org
  221. c8 = cells[8].find(text=True) # status com
  222. c9 = cells[9].find(text=True) # status net
  223. c10 = cells[10].find(text=True) # status org
  224. c11 = cells[11].find(text=True) # status de
  225. c12 = cells[12].find(text=True) # tld registered
  226. c13 = cells[13].find(text=True) # changes
  227. c14 = cells[14].find(text=True) # whois
  228. c15 = "" # not used
  229. c16 = "" # not used
  230. c17 = "" # not used
  231. # Expired Domains results have an additional 'Availability' column that breaks parsing "deleted" domains
  232. #c15 = cells[15].find(text=True) # related links
  233. available = ''
  234. if c8 == "available":
  235. available += ".com "
  236. if c9 == "available":
  237. available += ".net "
  238. if c10 == "available":
  239. available += ".org "
  240. if c11 == "available":
  241. available += ".de "
  242. status = ""
  243. if c15:
  244. status = c15
  245. # Skip additional reputation checks if this domain is already categorized as malicious
  246. if c0 in maldomains_list:
  247. print("[-] Skipping {} - Identified as known malware domain").format(c0)
  248. else:
  249. bluecoat = ''
  250. ibmxforce = ''
  251. if c3 == '-':
  252. bluecoat = 'ignored'
  253. ibmxforce = 'ignored'
  254. elif check == True:
  255. bluecoat = checkBluecoat(c0)
  256. print("[+] {} is categorized as: {}".format(c0, bluecoat))
  257. ibmxforce = checkIBMxForce(c0)
  258. print("[+] {} is categorized as: {}".format(c0, ibmxforce))
  259. # Sleep to avoid captchas
  260. time.sleep(random.randrange(10,20))
  261. else:
  262. bluecoat = "skipped"
  263. ibmxforce = "skipped"
  264. # Append parsed domain data to list
  265. data.append([c0,c3,c4,available,status,bluecoat,ibmxforce])
  266. except Exception as e: print(e)
  267. #print("[-] Error: No results found on this page!")
  268. # TODO: Add support of input file
  269. # Retrieve the most recent expired/deleted domain results
  270. # elif inputfile:
  271. # print('[*] Fetching domain reputation from file: {}').format(inputfile)
  272. # # read in file contents to list
  273. # try:
  274. # domains = [line.rstrip('\r\n') for line in open(inputfile, "r")]
  275. # except IOError:
  276. # print '[-] Error: "{}" does not appear to exist.'.format(inputfile)
  277. # exit()
  278. # print('[*] Domains loaded: {}').format(len(domains))
  279. # for domain in domains:
  280. # if domain in maldomains_list:
  281. # print("[-] Skipping {} - Identified as known malware domain").format(domain)
  282. # else:
  283. # bluecoat = ''
  284. # ibmxforce = ''
  285. # bluecoat = checkBluecoat(domain)
  286. # print "[+] {} is categorized as: {}".format(domain, bluecoat)
  287. # ibmxforce = checkIBMxForce(domain)
  288. # print "[+] {} is categorized as: {}".format(domain, ibmxforce)
  289. # # Sleep to avoid captchas
  290. # time.sleep(random.randrange(10,20))
  291. # data.append([domain,'-','-','-',bluecoat,ibmxforce])
  292. # Sort domain list by column 2 (Birth Year)
  293. sortedData = sorted(data, key=lambda x: x[1], reverse=True)
  294. t.add_rows(sortedData)
  295. header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'BC', 'IBM']
  296. t.header(header)
  297. # Build HTML Table
  298. html = ''
  299. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  300. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  301. htmlTableHeader = '''
  302. <table border="1" align="center">
  303. <th>Domain</th>
  304. <th>Birth</th>
  305. <th>Entries</th>
  306. <th>TLDs Available</th>
  307. <th>Status</th>
  308. <th>Bluecoat</th>
  309. <th>Categorization</th>
  310. <th>IBM-xForce</th>
  311. <th>Categorization</th>
  312. <th>WatchGuard</th>
  313. <th>Namecheap</th>
  314. <th>Archive.org</th>
  315. '''
  316. htmlTableBody = ''
  317. htmlTableFooter = '</table>'
  318. htmlFooter = '</body></html>'
  319. # Build HTML table contents
  320. for i in sortedData:
  321. htmlTableBody += '<tr>'
  322. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  323. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  324. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  325. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  326. htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
  327. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/sitereview.jsp#/?search={}" target="_blank">Bluecoat</a></td>'.format(i[0]) # Bluecoat
  328. htmlTableBody += '<td>{}</td>'.format(i[5]) # Bluecoat Categorization
  329. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">IBM-xForce</a></td>'.format(i[0]) # IBM xForce
  330. htmlTableBody += '<td>{}</td>'.format(i[6]) # IBM x-Force Categorization
  331. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  332. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  333. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  334. htmlTableBody += '</tr>'
  335. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  336. logfilename = "{}_domainreport.html".format(timestamp)
  337. log = open(logfilename,'w')
  338. log.write(html)
  339. log.close
  340. print("\n[*] Search complete")
  341. print("[*] Log written to {}\n".format(logfilename))
  342. print(t.draw())