domainhunter.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: @joevest and @andrewchiles
  4. ## Description: Checks expired domains, reputation/categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # To-do:
  7. # Add reputation categorizations to identify desireable vs undesireable domains
  8. # Code cleanup/optimization
  9. # Add Authenticated "Members-Only" option to download CSV/txt (https://member.expireddomains.net/domains/expiredcom/)
  10. import time
  11. import random
  12. import argparse
  13. import json
  14. __version__ = "20180407"
  15. ## Functions
  16. def checkBluecoat(domain):
  17. try:
  18. url = 'https://sitereview.bluecoat.com/resource/lookup'
  19. postData = {'url':domain,'captcha':''} # HTTP POST Parameters
  20. headers = {'User-Agent':useragent,
  21. 'Content-Type':'application/json; charset=UTF-8',
  22. 'Referer':'https://sitereview.bluecoat.com/lookup'}
  23. print('[*] BlueCoat Check: {}'.format(domain))
  24. response = s.post(url,headers=headers,json=postData,verify=False)
  25. responseJSON = json.loads(response.text)
  26. if 'errorType' in responseJSON:
  27. a = responseJSON['errorType']
  28. else:
  29. a = responseJSON['categorization'][0]['name']
  30. # # Print notice if CAPTCHAs are blocking accurate results
  31. # if a == 'captcha':
  32. # print('[-] Error: Blue Coat CAPTCHA received. Change your IP or manually solve a CAPTCHA at "https://sitereview.bluecoat.com/sitereview.jsp"')
  33. # #raw_input('[*] Press Enter to continue...')
  34. return a
  35. except:
  36. print('[-] Error retrieving Bluecoat reputation!')
  37. return "-"
  38. def checkIBMxForce(domain):
  39. try:
  40. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  41. headers = {'User-Agent':useragent,
  42. 'Accept':'application/json, text/plain, */*',
  43. 'x-ui':'XFE',
  44. 'Origin':url,
  45. 'Referer':url}
  46. print('[*] IBM xForce Check: {}'.format(domain))
  47. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  48. response = s.get(url,headers=headers,verify=False)
  49. responseJSON = json.loads(response.text)
  50. if 'error' in responseJSON:
  51. a = responseJSON['error']
  52. else:
  53. a = str(responseJSON["result"]['cats'])
  54. return a
  55. except:
  56. print('[-] Error retrieving IBM x-Force reputation!')
  57. return "-"
  58. def checkTalos(domain):
  59. try:
  60. url = "https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc".format(domain)
  61. headers = {'User-Agent':useragent,
  62. 'Referer':url}
  63. print('[*] Cisco Talos Check: {}'.format(domain))
  64. response = s.get(url,headers=headers,verify=False)
  65. responseJSON = json.loads(response.text)
  66. if 'error' in responseJSON:
  67. a = str(responseJSON['error'])
  68. else:
  69. a = '{0} (Score: {1})'.format(str(responseJSON['category']['description']), str(responseJSON['web_score_name']))
  70. return a
  71. except:
  72. print('[-] Error retrieving Talos reputation!')
  73. return "-"
  74. def downloadMalwareDomains():
  75. url = malwaredomains
  76. response = s.get(url,headers=headers,verify=False)
  77. responseText = response.text
  78. if response.status_code == 200:
  79. return responseText
  80. else:
  81. print("Error reaching:{} Status: {}").format(url, response.status_code)
  82. ## MAIN
  83. if __name__ == "__main__":
  84. try:
  85. import requests
  86. from bs4 import BeautifulSoup
  87. from texttable import Texttable
  88. except Exception as e:
  89. print("Expired Domains Reputation Check")
  90. print("[-] Missing dependencies: {}".format(str(e)))
  91. print("[*] Install required dependencies by running `pip install -r requirements.txt`")
  92. quit(0)
  93. parser = argparse.ArgumentParser(description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains')
  94. parser.add_argument('-q','--query', help='Optional keyword used to refine search results', required=False, default=False, type=str, dest='query')
  95. parser.add_argument('-c','--check', help='Perform slow reputation checks', required=False, default=False, action='store_true', dest='check')
  96. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains (min. 100)', required=False, default=100, type=int, dest='maxresults')
  97. parser.add_argument('-s','--single', help='Performs reputation checks against a single domain name.', required=False, default=False, dest='single')
  98. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, default=400, type=int, dest='maxwidth')
  99. parser.add_argument('-v','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
  100. args = parser.parse_args()
  101. ## Variables
  102. query = args.query
  103. check = args.check
  104. maxresults = args.maxresults
  105. if maxresults < 100:
  106. maxresults = 100
  107. single = args.single
  108. maxwidth = args.maxwidth
  109. malwaredomains = 'http://mirror1.malwaredomains.com/files/justdomains'
  110. expireddomainsqueryurl = 'https://www.expireddomains.net/domain-name-search'
  111. timestamp = time.strftime("%Y%m%d_%H%M%S")
  112. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  113. headers = {'User-Agent':useragent}
  114. requests.packages.urllib3.disable_warnings()
  115. # HTTP Session container, used to manage cookies, session tokens and other session information
  116. s = requests.Session()
  117. data = []
  118. title = '''
  119. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  120. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  121. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  122. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  123. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  124. print(title)
  125. print("")
  126. print("Expired Domains Reputation Checker")
  127. print("Authors: @joevest and @andrewchiles\n")
  128. print("DISCLAIMER: This is for educational purposes only!")
  129. disclaimer = '''It is designed to promote education and the improvement of computer/cyber security.
  130. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  131. If you plan to use this content for illegal purpose, don't. Have a nice day :)'''
  132. print(disclaimer)
  133. print("")
  134. # Retrieve reputation for a single choosen domain (Quick Mode)
  135. if single:
  136. domain = single
  137. print('[*] Fetching domain reputation for: {}'.format(domain))
  138. bluecoat = ''
  139. ibmxforce = ''
  140. ciscotalos = ''
  141. bluecoat = checkBluecoat(domain)
  142. print("[+] {}: {}".format(domain, bluecoat))
  143. ibmxforce = checkIBMxForce(domain)
  144. print("[+] {}: {}".format(domain, ibmxforce))
  145. ciscotalos = checkTalos(domain)
  146. print("[+] {}: {}".format(domain, ciscotalos))
  147. quit()
  148. # Calculate estimated runtime based on sleep variable
  149. runtime = 0
  150. if check:
  151. runtime = (maxresults * 20) / 60
  152. else:
  153. runtime = maxresults * .15 / 60
  154. print("Estimated Max Run Time: {} minutes\n".format(int(runtime)))
  155. # Download known malware domains
  156. print('[*] Downloading malware domain list from {}'.format(malwaredomains))
  157. maldomains = downloadMalwareDomains()
  158. maldomains_list = maldomains.split("\n")
  159. # Generic Proxy support
  160. # TODO: add as a parameter
  161. proxies = {
  162. 'http': 'http://127.0.0.1:8080',
  163. 'https': 'http://127.0.0.1:8080',
  164. }
  165. # Create an initial session
  166. domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False)
  167. #domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False,proxies=proxies)
  168. # Generate list of URLs to query for expired/deleted domains, queries return 25 results per page
  169. urls = []
  170. # Use the keyword string to narrow domain search if provided
  171. if query:
  172. print('[*] Fetching expired or deleted domains containing "{}"'.format(query))
  173. for i in range (0,maxresults,25):
  174. if i == 0:
  175. urls.append("{}/?q={}".format(expireddomainsqueryurl,query))
  176. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(query)
  177. else:
  178. urls.append("{}/?start={}&q={}".format(expireddomainsqueryurl,i,query))
  179. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),query)
  180. # If no keyword provided, retrieve list of recently expired domains
  181. else:
  182. print('[*] Fetching expired or deleted domains...')
  183. for i in range (0,(maxresults),25):
  184. urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}&o=changed&r=a'.format(i))
  185. urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}&o=changed&r=a'.format(i))
  186. urls.append('https://www.expireddomains.net/deleted-net-domains/?start={}&o=changed&r=a'.format(i))
  187. urls.append('https://www.expireddomains.net/deleted-org-domains/?start={}&o=changed&r=a'.format(i))
  188. for url in urls:
  189. print("[*] {}".format(url))
  190. # Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
  191. # are set in JavaScript and not recognized by Requests so we add them here manually.
  192. # May not be needed, but the _pk_id.10.dd0a cookie only requires a single . to be successful
  193. # In order to somewhat match a real cookie, but still be different, random integers are introduced
  194. r1 = random.randint(100000,999999)
  195. # Known good example _pk_id.10.dd0a cookie: 5abbbc772cbacfb1.1496760705.2.1496760705.1496760705
  196. pk_str = '5abbbc772cbacfb1' + '.1496' + str(r1) + '.2.1496' + str(r1) + '.1496' + str(r1)
  197. jar = requests.cookies.RequestsCookieJar()
  198. jar.set('_pk_ses.10.dd0a', '*', domain='expireddomains.net', path='/')
  199. jar.set('_pk_id.10.dd0a', pk_str, domain='expireddomains.net', path='/')
  200. domainrequest = s.get(url,headers=headers,verify=False,cookies=jar)
  201. #domainrequest = s.get(url,headers=headers,verify=False,cookies=jar,proxies=proxies)
  202. domains = domainrequest.text
  203. # Turn the HTML into a Beautiful Soup object
  204. soup = BeautifulSoup(domains, 'lxml')
  205. table = soup.find("table")
  206. try:
  207. for row in table.findAll('tr')[1:]:
  208. # Alternative way to extract domain name
  209. # domain = row.find('td').find('a').text
  210. cells = row.findAll("td")
  211. if len(cells) >= 1:
  212. output = ""
  213. if query:
  214. c0 = row.find('td').find('a').text # domain
  215. c1 = cells[1].find(text=True) # bl
  216. c2 = cells[2].find(text=True) # domainpop
  217. c3 = cells[3].find(text=True) # birth
  218. c4 = cells[4].find(text=True) # Archive.org entries
  219. c5 = cells[5].find(text=True) # similarweb
  220. c6 = cells[6].find(text=True) # similarweb country code
  221. c7 = cells[7].find(text=True) # Dmoz.org
  222. c8 = cells[8].find(text=True) # status com
  223. c9 = cells[9].find(text=True) # status net
  224. c10 = cells[10].find(text=True) # status org
  225. c11 = cells[11].find(text=True) # status de
  226. c12 = cells[12].find(text=True) # tld registered
  227. c13 = cells[13].find(text=True) # Related Domains
  228. c14 = cells[14].find(text=True) # Domain list
  229. c15 = cells[15].find(text=True) # status
  230. c16 = cells[16].find(text=True) # related links
  231. else:
  232. c0 = cells[0].find(text=True) # domain
  233. c1 = cells[1].find(text=True) # bl
  234. c2 = cells[2].find(text=True) # domainpop
  235. c3 = cells[3].find(text=True) # birth
  236. c4 = cells[4].find(text=True) # Archive.org entries
  237. c5 = cells[5].find(text=True) # similarweb
  238. c6 = cells[6].find(text=True) # similarweb country code
  239. c7 = cells[7].find(text=True) # Dmoz.org
  240. c8 = cells[8].find(text=True) # status com
  241. c9 = cells[9].find(text=True) # status net
  242. c10 = cells[10].find(text=True) # status org
  243. c11 = cells[11].find(text=True) # status de
  244. c12 = cells[12].find(text=True) # tld registered
  245. c13 = cells[13].find(text=True) # changes
  246. c14 = cells[14].find(text=True) # whois
  247. c15 = "" # not used
  248. c16 = "" # not used
  249. c17 = "" # not used
  250. # Expired Domains results have an additional 'Availability' column that breaks parsing "deleted" domains
  251. #c15 = cells[15].find(text=True) # related links
  252. available = ''
  253. if c8 == "available":
  254. available += ".com "
  255. if c9 == "available":
  256. available += ".net "
  257. if c10 == "available":
  258. available += ".org "
  259. if c11 == "available":
  260. available += ".de "
  261. status = ""
  262. if c15:
  263. status = c15
  264. # Skip additional reputation checks if this domain is already categorized as malicious
  265. if c0 in maldomains_list:
  266. print("[-] Skipping {} - Identified as known malware domain").format(c0)
  267. else:
  268. bluecoat = ''
  269. ibmxforce = ''
  270. if c3 == '-':
  271. bluecoat = 'ignored'
  272. ibmxforce = 'ignored'
  273. elif check == True:
  274. bluecoat = checkBluecoat(c0)
  275. print("[+] {}: {}".format(c0, bluecoat))
  276. ibmxforce = checkIBMxForce(c0)
  277. print("[+] {}: {}".format(c0, ibmxforce))
  278. # Sleep to avoid captchas
  279. time.sleep(random.randrange(10,20))
  280. else:
  281. bluecoat = "skipped"
  282. ibmxforce = "skipped"
  283. # Append parsed domain data to list
  284. data.append([c0,c3,c4,available,status,bluecoat,ibmxforce])
  285. except Exception as e:
  286. print(e)
  287. # Sort domain list by column 2 (Birth Year)
  288. sortedData = sorted(data, key=lambda x: x[1], reverse=True)
  289. # Build HTML Table
  290. html = ''
  291. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  292. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  293. htmlTableHeader = '''
  294. <table border="1" align="center">
  295. <th>Domain</th>
  296. <th>Birth</th>
  297. <th>Entries</th>
  298. <th>TLDs Available</th>
  299. <th>Status</th>
  300. <th>Symantec</th>
  301. <th>Categorization</th>
  302. <th>IBM-xForce</th>
  303. <th>Categorization</th>
  304. <th>WatchGuard</th>
  305. <th>Namecheap</th>
  306. <th>Archive.org</th>
  307. '''
  308. htmlTableBody = ''
  309. htmlTableFooter = '</table>'
  310. htmlFooter = '</body></html>'
  311. # Build HTML table contents
  312. for i in sortedData:
  313. htmlTableBody += '<tr>'
  314. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  315. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  316. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  317. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  318. htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
  319. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/sitereview#/?search={}" target="_blank">Bluecoat</a></td>'.format(i[0]) # Bluecoat
  320. htmlTableBody += '<td>{}</td>'.format(i[5]) # Bluecoat Categorization
  321. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">IBM-xForce</a></td>'.format(i[0]) # IBM xForce
  322. htmlTableBody += '<td>{}</td>'.format(i[6]) # IBM x-Force Categorization
  323. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  324. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  325. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  326. htmlTableBody += '</tr>'
  327. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  328. logfilename = "{}_domainreport.html".format(timestamp)
  329. log = open(logfilename,'w')
  330. log.write(html)
  331. log.close
  332. print("\n[*] Search complete")
  333. print("[*] Log written to {}\n".format(logfilename))
  334. # Print Text Table
  335. t = Texttable(max_width=maxwidth)
  336. t.add_rows(sortedData)
  337. header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'Symantec', 'IBM']
  338. t.header(header)
  339. print(t.draw())