domainhunter.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: @joevest and @andrewchiles
  4. ## Description: Checks expired domains, reputation/categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # To-do:
  7. # Code cleanup/optimization
  8. # Add Authenticated "Members-Only" option to download CSV/txt (https://member.expireddomains.net/domains/expiredcom/)
  9. import time
  10. import random
  11. import argparse
  12. import json
  13. __version__ = "20180409"
  14. ## Functions
  15. def doSleep(timing):
  16. if timing == 0:
  17. time.sleep(random.randrange(90,120))
  18. elif timing == 1:
  19. time.sleep(random.randrange(60,90))
  20. elif timing == 2:
  21. time.sleep(random.randrange(30,60))
  22. elif timing == 3:
  23. time.sleep(random.randrange(10,20))
  24. elif timing == 4:
  25. time.sleep(random.randrange(5,10))
  26. else:
  27. # Maxiumum speed - no delay
  28. pass
  29. def checkBluecoat(domain):
  30. try:
  31. url = 'https://sitereview.bluecoat.com/resource/lookup'
  32. postData = {'url':domain,'captcha':''} # HTTP POST Parameters
  33. headers = {'User-Agent':useragent,
  34. 'Content-Type':'application/json; charset=UTF-8',
  35. 'Referer':'https://sitereview.bluecoat.com/lookup'}
  36. print('[*] BlueCoat: {}'.format(domain))
  37. response = s.post(url,headers=headers,json=postData,verify=False)
  38. responseJSON = json.loads(response.text)
  39. if 'errorType' in responseJSON:
  40. a = responseJSON['errorType']
  41. else:
  42. a = responseJSON['categorization'][0]['name']
  43. # # Print notice if CAPTCHAs are blocking accurate results
  44. # if a == 'captcha':
  45. # print('[-] Error: Blue Coat CAPTCHA received. Change your IP or manually solve a CAPTCHA at "https://sitereview.bluecoat.com/sitereview.jsp"')
  46. # #raw_input('[*] Press Enter to continue...')
  47. return a
  48. except:
  49. print('[-] Error retrieving Bluecoat reputation!')
  50. return "-"
  51. def checkIBMxForce(domain):
  52. try:
  53. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  54. headers = {'User-Agent':useragent,
  55. 'Accept':'application/json, text/plain, */*',
  56. 'x-ui':'XFE',
  57. 'Origin':url,
  58. 'Referer':url}
  59. print('[*] IBM xForce: {}'.format(domain))
  60. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  61. response = s.get(url,headers=headers,verify=False)
  62. responseJSON = json.loads(response.text)
  63. if 'error' in responseJSON:
  64. a = responseJSON['error']
  65. elif not responseJSON['result']['cats']:
  66. a = 'Uncategorized'
  67. else:
  68. categories = ''
  69. # Parse all dictionary keys and append to single string to get Category names
  70. for key in responseJSON["result"]['cats']:
  71. categories += '{0}, '.format(str(key))
  72. a = '{0}(Score: {1})'.format(categories,str(responseJSON['result']['score']))
  73. return a
  74. except:
  75. print('[-] Error retrieving IBM x-Force reputation!')
  76. return "-"
  77. def checkTalos(domain):
  78. url = "https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc".format(domain)
  79. headers = {'User-Agent':useragent,
  80. 'Referer':url}
  81. print('[*] Cisco Talos: {}'.format(domain))
  82. try:
  83. response = s.get(url,headers=headers,verify=False)
  84. responseJSON = json.loads(response.text)
  85. if 'error' in responseJSON:
  86. a = str(responseJSON['error'])
  87. elif responseJSON['category'] is None:
  88. a = 'Uncategorized'
  89. else:
  90. a = '{0} (Score: {1})'.format(str(responseJSON['category']['description']), str(responseJSON['web_score_name']))
  91. return a
  92. except:
  93. print('[-] Error retrieving Talos reputation!')
  94. return "-"
  95. def checkMXToolbox(domain):
  96. url = 'https://mxtoolbox.com/Public/Tools/BrandReputation.aspx'
  97. headers = {'User-Agent':useragent,
  98. 'Origin':url,
  99. 'Referer':url}
  100. print('[*] Google SafeBrowsing and PhishTank: {}'.format(domain))
  101. try:
  102. response = s.get(url=url, headers=headers)
  103. soup = BeautifulSoup(response.content,'lxml')
  104. viewstate = soup.select('input[name=__VIEWSTATE]')[0]['value']
  105. viewstategenerator = soup.select('input[name=__VIEWSTATEGENERATOR]')[0]['value']
  106. eventvalidation = soup.select('input[name=__EVENTVALIDATION]')[0]['value']
  107. data = {
  108. "__EVENTTARGET": "",
  109. "__EVENTARGUMENT": "",
  110. "__VIEWSTATE": viewstate,
  111. "__VIEWSTATEGENERATOR": viewstategenerator,
  112. "__EVENTVALIDATION": eventvalidation,
  113. "ctl00$ContentPlaceHolder1$brandReputationUrl": domain,
  114. "ctl00$ContentPlaceHolder1$brandReputationDoLookup": "Brand Reputation Lookup",
  115. "ctl00$ucSignIn$hfRegCode": 'missing',
  116. "ctl00$ucSignIn$hfRedirectSignUp": '/Public/Tools/BrandReputation.aspx',
  117. "ctl00$ucSignIn$hfRedirectLogin": '',
  118. "ctl00$ucSignIn$txtEmailAddress": '',
  119. "ctl00$ucSignIn$cbNewAccount": 'cbNewAccount',
  120. "ctl00$ucSignIn$txtFullName": '',
  121. "ctl00$ucSignIn$txtModalNewPassword": '',
  122. "ctl00$ucSignIn$txtPhone": '',
  123. "ctl00$ucSignIn$txtCompanyName": '',
  124. "ctl00$ucSignIn$drpTitle": '',
  125. "ctl00$ucSignIn$txtTitleName": '',
  126. "ctl00$ucSignIn$txtModalPassword": ''
  127. }
  128. response = s.post(url=url, headers=headers, data=data)
  129. soup = BeautifulSoup(response.content,'lxml')
  130. a = ''
  131. if soup.select('div[id=ctl00_ContentPlaceHolder1_noIssuesFound]'):
  132. a = 'No issues found'
  133. return a
  134. else:
  135. if soup.select('div[id=ctl00_ContentPlaceHolder1_googleSafeBrowsingIssuesFound]'):
  136. a = 'Google SafeBrowsing Issues Found. '
  137. if soup.select('div[id=ctl00_ContentPlaceHolder1_phishTankIssuesFound]'):
  138. a += 'PhishTank Issues Found'
  139. return a
  140. except Exception as e:
  141. print('[-] Error retrieving Google SafeBrowsing and PhishTank reputation!')
  142. return "-"
  143. def downloadMalwareDomains(malwaredomainsURL):
  144. url = malwaredomainsURL
  145. response = s.get(url,headers=headers,verify=False)
  146. responseText = response.text
  147. if response.status_code == 200:
  148. return responseText
  149. else:
  150. print("[-] Error reaching:{} Status: {}").format(url, response.status_code)
  151. def checkDomain(domain):
  152. print('[*] Fetching domain reputation for: {}'.format(domain))
  153. if domain in maldomainsList:
  154. print("[!] {}: Identified as known malware domain (malwaredomains.com)".format(domain))
  155. mxtoolbox = checkMXToolbox(domain)
  156. print("[+] {}: {}".format(domain, mxtoolbox))
  157. bluecoat = checkBluecoat(domain)
  158. print("[+] {}: {}".format(domain, bluecoat))
  159. ibmxforce = checkIBMxForce(domain)
  160. print("[+] {}: {}".format(domain, ibmxforce))
  161. ciscotalos = checkTalos(domain)
  162. print("[+] {}: {}".format(domain, ciscotalos))
  163. print("")
  164. return
  165. ## MAIN
  166. if __name__ == "__main__":
  167. try:
  168. import requests
  169. from bs4 import BeautifulSoup
  170. from texttable import Texttable
  171. except Exception as e:
  172. print("Expired Domains Reputation Check")
  173. print("[-] Missing dependencies: {}".format(str(e)))
  174. print("[*] Install required dependencies by running `pip install -r requirements.txt`")
  175. quit(0)
  176. parser = argparse.ArgumentParser(description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains')
  177. parser.add_argument('-q','--query', help='Keyword used to refine search results', required=False, default=False, type=str, dest='query')
  178. parser.add_argument('-c','--check', help='Perform domain reputation checks', required=False, default=False, action='store_true', dest='check')
  179. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains', required=False, default=100, type=int, dest='maxresults')
  180. parser.add_argument('-s','--single', help='Performs detailed reputation checks against a single domain name/IP.', required=False, default=False, dest='single')
  181. parser.add_argument('-t','--timing', help='Modifies request timing to avoid CAPTCHAs. Slowest(0) = 90-120 seconds, Default(3) = 10-20 seconds, Fastest(5) = no delay', required=False, default=3, type=int, choices=range(0,6), dest='timing')
  182. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, default=400, type=int, dest='maxwidth')
  183. parser.add_argument('-v','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
  184. args = parser.parse_args()
  185. ## Variables
  186. query = args.query
  187. check = args.check
  188. maxresults = args.maxresults
  189. single = args.single
  190. timing = args.timing
  191. maxwidth = args.maxwidth
  192. malwaredomainsURL = 'http://mirror1.malwaredomains.com/files/justdomains'
  193. expireddomainsqueryURL = 'https://www.expireddomains.net/domain-name-search'
  194. timestamp = time.strftime("%Y%m%d_%H%M%S")
  195. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  196. headers = {'User-Agent':useragent}
  197. requests.packages.urllib3.disable_warnings()
  198. # HTTP Session container, used to manage cookies, session tokens and other session information
  199. s = requests.Session()
  200. data = []
  201. title = '''
  202. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  203. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  204. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  205. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  206. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  207. print(title)
  208. print("")
  209. print("Expired Domains Reputation Checker")
  210. print("Authors: @joevest and @andrewchiles\n")
  211. print("DISCLAIMER: This is for educational purposes only!")
  212. disclaimer = '''It is designed to promote education and the improvement of computer/cyber security.
  213. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  214. If you plan to use this content for illegal purpose, don't. Have a nice day :)'''
  215. print(disclaimer)
  216. print("")
  217. # Download known malware domains
  218. print('[*] Downloading malware domain list from {}\n'.format(malwaredomainsURL))
  219. maldomains = downloadMalwareDomains(malwaredomainsURL)
  220. maldomainsList = maldomains.split("\n")
  221. # Retrieve reputation for a single choosen domain (Quick Mode)
  222. if single:
  223. checkDomain(single)
  224. quit(0)
  225. # Calculate estimated runtime based on timing variable if checking domain categorization for all returned domains
  226. if check:
  227. if timing == 0:
  228. seconds = 90
  229. elif timing == 1:
  230. seconds = 60
  231. elif timing == 2:
  232. seconds = 30
  233. elif timing == 3:
  234. seconds = 20
  235. elif timing == 4:
  236. seconds = 10
  237. else:
  238. seconds = 0
  239. runtime = (maxresults * seconds) / 60
  240. print("[*] Peforming Domain Categorization Lookups:")
  241. print("[*] Estimated duration is {} minutes. Modify lookup speed with -t switch.\n".format(int(runtime)))
  242. else:
  243. pass
  244. # Generic Proxy support
  245. # TODO: add as a parameter
  246. proxies = {
  247. 'http': 'http://127.0.0.1:8080',
  248. 'https': 'http://127.0.0.1:8080',
  249. }
  250. # Create an initial session
  251. domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False)
  252. # Use proxy like Burp for debugging request/parsing errors
  253. #domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False,proxies=proxies)
  254. # Generate list of URLs to query for expired/deleted domains
  255. urls = []
  256. # Use the keyword string to narrow domain search if provided
  257. if query:
  258. print('[*] Fetching expired or deleted domains containing "{}"'.format(query))
  259. for i in range (0,maxresults,25):
  260. if i == 0:
  261. urls.append("{}/?q={}".format(expireddomainsqueryURL,query))
  262. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(query)
  263. else:
  264. urls.append("{}/?start={}&q={}".format(expireddomainsqueryURL,i,query))
  265. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),query)
  266. # If no keyword provided, retrieve list of recently expired domains in batches of 25 results.
  267. else:
  268. print('[*] Fetching expired or deleted domains...')
  269. # Caculate number of URLs to request since we're performing a request for four different resources instead of one
  270. numresults = int(maxresults / 4)
  271. for i in range (0,(numresults),25):
  272. urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}&o=changed&r=a'.format(i))
  273. urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}&o=changed&r=a'.format(i))
  274. urls.append('https://www.expireddomains.net/deleted-net-domains/?start={}&o=changed&r=a'.format(i))
  275. urls.append('https://www.expireddomains.net/deleted-org-domains/?start={}&o=changed&r=a'.format(i))
  276. for url in urls:
  277. print("[*] {}".format(url))
  278. # Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
  279. # are set in JavaScript and not recognized by Requests so we add them here manually.
  280. # May not be needed, but the _pk_id.10.dd0a cookie only requires a single . to be successful
  281. # In order to somewhat match a real cookie, but still be different, random integers are introduced
  282. r1 = random.randint(100000,999999)
  283. # Known good example _pk_id.10.dd0a cookie: 5abbbc772cbacfb1.1496760705.2.1496760705.1496760705
  284. pk_str = '5abbbc772cbacfb1' + '.1496' + str(r1) + '.2.1496' + str(r1) + '.1496' + str(r1)
  285. jar = requests.cookies.RequestsCookieJar()
  286. jar.set('_pk_ses.10.dd0a', '*', domain='expireddomains.net', path='/')
  287. jar.set('_pk_id.10.dd0a', pk_str, domain='expireddomains.net', path='/')
  288. domainrequest = s.get(url,headers=headers,verify=False,cookies=jar)
  289. #domainrequest = s.get(url,headers=headers,verify=False,cookies=jar,proxies=proxies)
  290. domains = domainrequest.text
  291. # Turn the HTML into a Beautiful Soup object
  292. soup = BeautifulSoup(domains, 'lxml')
  293. table = soup.find("table")
  294. try:
  295. for row in table.findAll('tr')[1:]:
  296. # Alternative way to extract domain name
  297. # domain = row.find('td').find('a').text
  298. cells = row.findAll("td")
  299. if len(cells) >= 1:
  300. output = ""
  301. if query:
  302. c0 = row.find('td').find('a').text # domain
  303. c1 = cells[1].find(text=True) # bl
  304. c2 = cells[2].find(text=True) # domainpop
  305. c3 = cells[3].find(text=True) # birth
  306. c4 = cells[4].find(text=True) # Archive.org entries
  307. c5 = cells[5].find(text=True) # similarweb
  308. c6 = cells[6].find(text=True) # similarweb country code
  309. c7 = cells[7].find(text=True) # Dmoz.org
  310. c8 = cells[8].find(text=True) # status com
  311. c9 = cells[9].find(text=True) # status net
  312. c10 = cells[10].find(text=True) # status org
  313. c11 = cells[11].find(text=True) # status de
  314. c12 = cells[12].find(text=True) # tld registered
  315. c13 = cells[13].find(text=True) # Related Domains
  316. c14 = cells[14].find(text=True) # Domain list
  317. c15 = cells[15].find(text=True) # status
  318. c16 = cells[16].find(text=True) # related links
  319. else:
  320. c0 = cells[0].find(text=True) # domain
  321. c1 = cells[1].find(text=True) # bl
  322. c2 = cells[2].find(text=True) # domainpop
  323. c3 = cells[3].find(text=True) # birth
  324. c4 = cells[4].find(text=True) # Archive.org entries
  325. c5 = cells[5].find(text=True) # similarweb
  326. c6 = cells[6].find(text=True) # similarweb country code
  327. c7 = cells[7].find(text=True) # Dmoz.org
  328. c8 = cells[8].find(text=True) # status com
  329. c9 = cells[9].find(text=True) # status net
  330. c10 = cells[10].find(text=True) # status org
  331. c11 = cells[11].find(text=True) # status de
  332. c12 = cells[12].find(text=True) # tld registered
  333. c13 = cells[13].find(text=True) # changes
  334. c14 = cells[14].find(text=True) # whois
  335. c15 = "" # not used
  336. c16 = "" # not used
  337. c17 = "" # not used
  338. # Expired Domains results have an additional 'Availability' column that breaks parsing "deleted" domains
  339. #c15 = cells[15].find(text=True) # related links
  340. available = ''
  341. if c8 == "available":
  342. available += ".com "
  343. if c9 == "available":
  344. available += ".net "
  345. if c10 == "available":
  346. available += ".org "
  347. if c11 == "available":
  348. available += ".de "
  349. status = ""
  350. if c15:
  351. status = c15
  352. # Skip additional reputation checks if this domain is already categorized as malicious
  353. if c0 in maldomainsList:
  354. print("[-] Skipping {} - Identified as known malware domain").format(c0)
  355. else:
  356. bluecoat = ''
  357. ibmxforce = ''
  358. if c3 == '-':
  359. bluecoat = 'ignored'
  360. ibmxforce = 'ignored'
  361. elif check == True:
  362. bluecoat = checkBluecoat(c0)
  363. print("[+] {}: {}".format(c0, bluecoat))
  364. ibmxforce = checkIBMxForce(c0)
  365. print("[+] {}: {}".format(c0, ibmxforce))
  366. # Sleep to avoid captchas
  367. doSleep(timing)
  368. else:
  369. bluecoat = "skipped"
  370. ibmxforce = "skipped"
  371. # Append parsed domain data to list
  372. data.append([c0,c3,c4,available,status,bluecoat,ibmxforce])
  373. except Exception as e:
  374. print(e)
  375. # Sort domain list by column 2 (Birth Year)
  376. sortedData = sorted(data, key=lambda x: x[1], reverse=True)
  377. # Build HTML Table
  378. html = ''
  379. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  380. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  381. htmlTableHeader = '''
  382. <table border="1" align="center">
  383. <th>Domain</th>
  384. <th>Birth</th>
  385. <th>Entries</th>
  386. <th>TLDs Available</th>
  387. <th>Status</th>
  388. <th>Symantec</th>
  389. <th>Categorization</th>
  390. <th>IBM-xForce</th>
  391. <th>Categorization</th>
  392. <th>WatchGuard</th>
  393. <th>Namecheap</th>
  394. <th>Archive.org</th>
  395. '''
  396. htmlTableBody = ''
  397. htmlTableFooter = '</table>'
  398. htmlFooter = '</body></html>'
  399. # Build HTML table contents
  400. for i in sortedData:
  401. htmlTableBody += '<tr>'
  402. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  403. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  404. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  405. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  406. htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
  407. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/sitereview#/?search={}" target="_blank">Bluecoat</a></td>'.format(i[0]) # Bluecoat
  408. htmlTableBody += '<td>{}</td>'.format(i[5]) # Bluecoat Categorization
  409. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">IBM-xForce</a></td>'.format(i[0]) # IBM xForce
  410. htmlTableBody += '<td>{}</td>'.format(i[6]) # IBM x-Force Categorization
  411. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  412. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  413. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  414. htmlTableBody += '</tr>'
  415. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  416. logfilename = "{}_domainreport.html".format(timestamp)
  417. log = open(logfilename,'w')
  418. log.write(html)
  419. log.close
  420. print("\n[*] Search complete")
  421. print("[*] Log written to {}\n".format(logfilename))
  422. # Print Text Table
  423. t = Texttable(max_width=maxwidth)
  424. t.add_rows(sortedData)
  425. header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'Symantec', 'IBM']
  426. t.header(header)
  427. print(t.draw())