domainhunter.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: @joevest and @andrewchiles
  4. ## Description: Checks expired domains, reputation/categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # If the expected response format from a provider changes, use the traceback module to get a full stack trace without removing try/catch blocks
  7. #import traceback
  8. #traceback.print_exc()
  9. import time
  10. import random
  11. import argparse
  12. import json
  13. import base64
  14. import os
  15. __version__ = "20180917"
  16. ## Functions
  17. def doSleep(timing):
  18. if timing == 0:
  19. time.sleep(random.randrange(90,120))
  20. elif timing == 1:
  21. time.sleep(random.randrange(60,90))
  22. elif timing == 2:
  23. time.sleep(random.randrange(30,60))
  24. elif timing == 3:
  25. time.sleep(random.randrange(10,20))
  26. elif timing == 4:
  27. time.sleep(random.randrange(5,10))
  28. # There's no elif timing == 5 here because we don't want to sleep for -t 5
  29. def checkBluecoat(domain):
  30. try:
  31. url = 'https://sitereview.bluecoat.com/resource/lookup'
  32. postData = {'url':domain,'captcha':''}
  33. headers = {'User-Agent':useragent,
  34. 'Accept':'application/json, text/plain, */*',
  35. 'Content-Type':'application/json; charset=UTF-8',
  36. 'Referer':'https://sitereview.bluecoat.com/lookup'}
  37. print('[*] BlueCoat: {}'.format(domain))
  38. response = s.post(url,headers=headers,json=postData,verify=False)
  39. responseJSON = json.loads(response.text)
  40. if 'errorType' in responseJSON:
  41. a = responseJSON['errorType']
  42. else:
  43. a = responseJSON['categorization'][0]['name']
  44. # Print notice if CAPTCHAs are blocking accurate results and attempt to solve if --ocr
  45. if a == 'captcha':
  46. if ocr:
  47. # This request is also performed by a browser, but is not needed for our purposes
  48. #captcharequestURL = 'https://sitereview.bluecoat.com/resource/captcha-request'
  49. print('[*] Received CAPTCHA challenge!')
  50. captcha = solveCaptcha('https://sitereview.bluecoat.com/resource/captcha.jpg',s)
  51. if captcha:
  52. b64captcha = base64.urlsafe_b64encode(captcha.encode('utf-8')).decode('utf-8')
  53. # Send CAPTCHA solution via GET since inclusion with the domain categorization request doens't work anymore
  54. captchasolutionURL = 'https://sitereview.bluecoat.com/resource/captcha-request/{0}'.format(b64captcha)
  55. print('[*] Submiting CAPTCHA at {0}'.format(captchasolutionURL))
  56. response = s.get(url=captchasolutionURL,headers=headers,verify=False)
  57. # Try the categorization request again
  58. response = s.post(url,headers=headers,json=postData,verify=False)
  59. responseJSON = json.loads(response.text)
  60. if 'errorType' in responseJSON:
  61. a = responseJSON['errorType']
  62. else:
  63. a = responseJSON['categorization'][0]['name']
  64. else:
  65. print('[-] Error: Failed to solve BlueCoat CAPTCHA with OCR! Manually solve at "https://sitereview.bluecoat.com/sitereview.jsp"')
  66. else:
  67. print('[-] Error: BlueCoat CAPTCHA received. Try --ocr flag or manually solve a CAPTCHA at "https://sitereview.bluecoat.com/sitereview.jsp"')
  68. return a
  69. except Exception as e:
  70. print('[-] Error retrieving Bluecoat reputation! {0}'.format(e))
  71. return "-"
  72. def checkIBMXForce(domain):
  73. try:
  74. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  75. headers = {'User-Agent':useragent,
  76. 'Accept':'application/json, text/plain, */*',
  77. 'x-ui':'XFE',
  78. 'Origin':url,
  79. 'Referer':url}
  80. print('[*] IBM xForce: {}'.format(domain))
  81. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  82. response = s.get(url,headers=headers,verify=False)
  83. responseJSON = json.loads(response.text)
  84. if 'error' in responseJSON:
  85. a = responseJSON['error']
  86. elif not responseJSON['result']['cats']:
  87. a = 'Uncategorized'
  88. ## TO-DO - Add noticed when "intrusion" category is returned. This is indication of rate limit / brute-force protection hit on the endpoint
  89. else:
  90. categories = ''
  91. # Parse all dictionary keys and append to single string to get Category names
  92. for key in responseJSON["result"]['cats']:
  93. categories += '{0}, '.format(str(key))
  94. a = '{0}(Score: {1})'.format(categories,str(responseJSON['result']['score']))
  95. return a
  96. except:
  97. print('[-] Error retrieving IBM x-Force reputation!')
  98. return "-"
  99. def checkTalos(domain):
  100. url = 'https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc'.format(domain)
  101. headers = {'User-Agent':useragent,
  102. 'Referer':url}
  103. print('[*] Cisco Talos: {}'.format(domain))
  104. try:
  105. response = s.get(url,headers=headers,verify=False)
  106. responseJSON = json.loads(response.text)
  107. if 'error' in responseJSON:
  108. a = str(responseJSON['error'])
  109. if a == "Unfortunately, we can't find any results for your search.":
  110. a = 'Uncategorized'
  111. elif responseJSON['category'] is None:
  112. a = 'Uncategorized'
  113. else:
  114. a = '{0} (Score: {1})'.format(str(responseJSON['category']['description']), str(responseJSON['web_score_name']))
  115. return a
  116. except:
  117. print('[-] Error retrieving Talos reputation!')
  118. return "-"
  119. def checkMXToolbox(domain):
  120. url = 'https://mxtoolbox.com/Public/Tools/BrandReputation.aspx'
  121. headers = {'User-Agent':useragent,
  122. 'Origin':url,
  123. 'Referer':url}
  124. print('[*] Google SafeBrowsing and PhishTank: {}'.format(domain))
  125. try:
  126. response = s.get(url=url, headers=headers)
  127. soup = BeautifulSoup(response.content,'lxml')
  128. viewstate = soup.select('input[name=__VIEWSTATE]')[0]['value']
  129. viewstategenerator = soup.select('input[name=__VIEWSTATEGENERATOR]')[0]['value']
  130. eventvalidation = soup.select('input[name=__EVENTVALIDATION]')[0]['value']
  131. data = {
  132. "__EVENTTARGET": "",
  133. "__EVENTARGUMENT": "",
  134. "__VIEWSTATE": viewstate,
  135. "__VIEWSTATEGENERATOR": viewstategenerator,
  136. "__EVENTVALIDATION": eventvalidation,
  137. "ctl00$ContentPlaceHolder1$brandReputationUrl": domain,
  138. "ctl00$ContentPlaceHolder1$brandReputationDoLookup": "Brand Reputation Lookup",
  139. "ctl00$ucSignIn$hfRegCode": 'missing',
  140. "ctl00$ucSignIn$hfRedirectSignUp": '/Public/Tools/BrandReputation.aspx',
  141. "ctl00$ucSignIn$hfRedirectLogin": '',
  142. "ctl00$ucSignIn$txtEmailAddress": '',
  143. "ctl00$ucSignIn$cbNewAccount": 'cbNewAccount',
  144. "ctl00$ucSignIn$txtFullName": '',
  145. "ctl00$ucSignIn$txtModalNewPassword": '',
  146. "ctl00$ucSignIn$txtPhone": '',
  147. "ctl00$ucSignIn$txtCompanyName": '',
  148. "ctl00$ucSignIn$drpTitle": '',
  149. "ctl00$ucSignIn$txtTitleName": '',
  150. "ctl00$ucSignIn$txtModalPassword": ''
  151. }
  152. response = s.post(url=url, headers=headers, data=data)
  153. soup = BeautifulSoup(response.content,'lxml')
  154. a = ''
  155. if soup.select('div[id=ctl00_ContentPlaceHolder1_noIssuesFound]'):
  156. a = 'No issues found'
  157. return a
  158. else:
  159. if soup.select('div[id=ctl00_ContentPlaceHolder1_googleSafeBrowsingIssuesFound]'):
  160. a = 'Google SafeBrowsing Issues Found. '
  161. if soup.select('div[id=ctl00_ContentPlaceHolder1_phishTankIssuesFound]'):
  162. a += 'PhishTank Issues Found'
  163. return a
  164. except Exception as e:
  165. print('[-] Error retrieving Google SafeBrowsing and PhishTank reputation!')
  166. return "-"
  167. def downloadMalwareDomains(malwaredomainsURL):
  168. url = malwaredomainsURL
  169. response = s.get(url=url,headers=headers,verify=False)
  170. responseText = response.text
  171. if response.status_code == 200:
  172. return responseText
  173. else:
  174. print("[-] Error reaching:{} Status: {}").format(url, response.status_code)
  175. def checkDomain(domain):
  176. print('[*] Fetching domain reputation for: {}'.format(domain))
  177. if domain in maldomainsList:
  178. print("[!] {}: Identified as known malware domain (malwaredomains.com)".format(domain))
  179. bluecoat = checkBluecoat(domain)
  180. print("[+] {}: {}".format(domain, bluecoat))
  181. ibmxforce = checkIBMXForce(domain)
  182. print("[+] {}: {}".format(domain, ibmxforce))
  183. ciscotalos = checkTalos(domain)
  184. print("[+] {}: {}".format(domain, ciscotalos))
  185. mxtoolbox = checkMXToolbox(domain)
  186. print("[+] {}: {}".format(domain, mxtoolbox))
  187. print("")
  188. results = [domain,bluecoat,ibmxforce,ciscotalos,mxtoolbox]
  189. return results
  190. def solveCaptcha(url,session):
  191. # Downloads CAPTCHA image and saves to current directory for OCR with tesseract
  192. # Returns CAPTCHA string or False if error occured
  193. jpeg = 'captcha.jpg'
  194. try:
  195. response = session.get(url=url,headers=headers,verify=False, stream=True)
  196. if response.status_code == 200:
  197. with open(jpeg, 'wb') as f:
  198. response.raw.decode_content = True
  199. shutil.copyfileobj(response.raw, f)
  200. else:
  201. print('[-] Error downloading CAPTCHA file!')
  202. return False
  203. # Perform basic OCR without additional image enhancement
  204. text = pytesseract.image_to_string(Image.open(jpeg))
  205. text = text.replace(" ", "")
  206. # Remove CAPTCHA file
  207. try:
  208. os.remove(jpeg)
  209. except OSError:
  210. pass
  211. return text
  212. except Exception as e:
  213. print("[-] Error solving CAPTCHA - {0}".format(e))
  214. return False
  215. def drawTable(header,data):
  216. data.insert(0,header)
  217. t = Texttable(max_width=maxwidth)
  218. t.add_rows(data)
  219. t.header(header)
  220. return(t.draw())
  221. ## MAIN
  222. if __name__ == "__main__":
  223. parser = argparse.ArgumentParser(
  224. description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
  225. epilog = '''Examples:
  226. ./domainhunter.py -k apples -c --ocr -t5
  227. ./domainhunter.py --check --ocr -t3
  228. ./domainhunter.py --single mydomain.com
  229. ./domainhunter.py --keyword tech --check --ocr --timing 5 --alexa
  230. ./domaihunter.py --filename inputlist.txt --ocr --timing 5''',
  231. formatter_class=argparse.RawDescriptionHelpFormatter)
  232. parser.add_argument('-a','--alexa', help='Filter results to Alexa listings', required=False, default=0, action='store_const', const=1)
  233. parser.add_argument('-k','--keyword', help='Keyword used to refine search results', required=False, default=False, type=str, dest='keyword')
  234. parser.add_argument('-c','--check', help='Perform domain reputation checks', required=False, default=False, action='store_true', dest='check')
  235. parser.add_argument('-f','--filename', help='Specify input file of line delimited domain names to check', required=False, default=False, type=str, dest='filename')
  236. parser.add_argument('--ocr', help='Perform OCR on CAPTCHAs when challenged', required=False, default=False, action='store_true')
  237. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains', required=False, default=100, type=int, dest='maxresults')
  238. parser.add_argument('-s','--single', help='Performs detailed reputation checks against a single domain name/IP.', required=False, default=False, dest='single')
  239. parser.add_argument('-t','--timing', help='Modifies request timing to avoid CAPTCHAs. Slowest(0) = 90-120 seconds, Default(3) = 10-20 seconds, Fastest(5) = no delay', required=False, default=3, type=int, choices=range(0,6), dest='timing')
  240. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, default=400, type=int, dest='maxwidth')
  241. parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
  242. args = parser.parse_args()
  243. # Load dependent modules
  244. try:
  245. import requests
  246. from bs4 import BeautifulSoup
  247. from texttable import Texttable
  248. except Exception as e:
  249. print("Expired Domains Reputation Check")
  250. print("[-] Missing basic dependencies: {}".format(str(e)))
  251. print("[*] Install required dependencies by running `pip3 install -r requirements.txt`")
  252. quit(0)
  253. # Load OCR related modules if --ocr flag is set since these can be difficult to get working
  254. if args.ocr:
  255. try:
  256. import pytesseract
  257. from PIL import Image
  258. import shutil
  259. except Exception as e:
  260. print("Expired Domains Reputation Check")
  261. print("[-] Missing OCR dependencies: {}".format(str(e)))
  262. print("[*] Install required Python dependencies by running: pip3 install -r requirements.txt")
  263. print("[*] Ubuntu\Debian - Install tesseract by running: apt-get install tesseract-ocr python3-imaging")
  264. print("[*] macOS - Install tesseract with homebrew by running: brew install tesseract")
  265. quit(0)
  266. ## Variables
  267. alexa = args.alexa
  268. keyword = args.keyword
  269. check = args.check
  270. filename = args.filename
  271. maxresults = args.maxresults
  272. single = args.single
  273. timing = args.timing
  274. maxwidth = args.maxwidth
  275. ocr = args.ocr
  276. malwaredomainsURL = 'http://mirror1.malwaredomains.com/files/justdomains'
  277. expireddomainsqueryURL = 'https://www.expireddomains.net/domain-name-search'
  278. timestamp = time.strftime("%Y%m%d_%H%M%S")
  279. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  280. headers = {'User-Agent':useragent}
  281. requests.packages.urllib3.disable_warnings()
  282. # HTTP Session container, used to manage cookies, session tokens and other session information
  283. s = requests.Session()
  284. title = '''
  285. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  286. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  287. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  288. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  289. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  290. print(title)
  291. print("")
  292. print("Expired Domains Reputation Checker")
  293. print("Authors: @joevest and @andrewchiles\n")
  294. print("DISCLAIMER: This is for educational purposes only!")
  295. disclaimer = '''It is designed to promote education and the improvement of computer/cyber security.
  296. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  297. If you plan to use this content for illegal purpose, don't. Have a nice day :)'''
  298. print(disclaimer)
  299. print("")
  300. # Download known malware domains
  301. print('[*] Downloading malware domain list from {}\n'.format(malwaredomainsURL))
  302. maldomains = downloadMalwareDomains(malwaredomainsURL)
  303. maldomainsList = maldomains.split("\n")
  304. # Retrieve reputation for a single choosen domain (Quick Mode)
  305. if single:
  306. checkDomain(single)
  307. exit(0)
  308. # Perform detailed domain reputation checks against input file, print table, and quit
  309. if filename:
  310. # Initialize our list with an empty row for the header
  311. data = []
  312. try:
  313. with open(filename, 'r') as domainsList:
  314. for line in domainsList.read().splitlines():
  315. data.append(checkDomain(line))
  316. doSleep(timing)
  317. # Print results table
  318. header = ['Domain', 'BlueCoat', 'IBM X-Force', 'Cisco Talos', 'MXToolbox']
  319. print(drawTable(header,data))
  320. except KeyboardInterrupt:
  321. print('Caught keyboard interrupt. Exiting!')
  322. exit(0)
  323. except Exception as e:
  324. print('[-] Error: {}'.format(e))
  325. exit(1)
  326. exit(0)
  327. # Generic Proxy support
  328. # TODO: add as a parameter
  329. proxies = {
  330. 'http': 'http://127.0.0.1:8080',
  331. 'https': 'http://127.0.0.1:8080',
  332. }
  333. # Create an initial session
  334. domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False)
  335. # Use proxy like Burp for debugging request/parsing errors
  336. #domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False,proxies=proxies)
  337. # Lists for our ExpiredDomains results
  338. domain_list = []
  339. data = []
  340. # Generate list of URLs to query for expired/deleted domains
  341. urls = []
  342. # Use the keyword string to narrow domain search if provided. This generates a list of URLs to query
  343. if keyword:
  344. print('[*] Fetching expired or deleted domains containing "{}"'.format(keyword))
  345. for i in range (0,maxresults,25):
  346. if i == 0:
  347. urls.append("{}/?q={}&fwhois=22&falexa={}".format(expireddomainsqueryURL,keyword,alexa))
  348. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(keyword)
  349. else:
  350. urls.append("{}/?start={}&q={}&fwhois=22&falexa={}".format(expireddomainsqueryURL,i,keyword,alexa))
  351. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),keyword)
  352. # If no keyword provided, generate list of recently expired domains URLS (batches of 25 results).
  353. else:
  354. print('[*] Fetching expired or deleted domains...')
  355. # Caculate number of URLs to request since we're performing a request for two different resources instead of one
  356. numresults = int(maxresults / 2)
  357. for i in range (0,(numresults),25):
  358. urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}&ftlds[]=2&ftlds[]=3&ftlds[]=4&falexa={}'.format(i,alexa))
  359. urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}&ftlds[]=2&ftlds[]=3&ftlds[]=4&falexa={}'.format(i,alexa))
  360. for url in urls:
  361. print("[*] {}".format(url))
  362. # Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
  363. # are set in JavaScript and not recognized by Requests so we add them here manually.
  364. # May not be needed, but the _pk_id.10.dd0a cookie only requires a single . to be successful
  365. # In order to somewhat match a real cookie, but still be different, random integers are introduced
  366. r1 = random.randint(100000,999999)
  367. # Known good example _pk_id.10.dd0a cookie: 5abbbc772cbacfb1.1496760705.2.1496760705.1496760705
  368. pk_str = '5abbbc772cbacfb1' + '.1496' + str(r1) + '.2.1496' + str(r1) + '.1496' + str(r1)
  369. jar = requests.cookies.RequestsCookieJar()
  370. jar.set('_pk_ses.10.dd0a', '*', domain='expireddomains.net', path='/')
  371. jar.set('_pk_id.10.dd0a', pk_str, domain='expireddomains.net', path='/')
  372. domainrequest = s.get(url,headers=headers,verify=False,cookies=jar)
  373. #domainrequest = s.get(url,headers=headers,verify=False,cookies=jar,proxies=proxies)
  374. domains = domainrequest.text
  375. # Turn the HTML into a Beautiful Soup object
  376. soup = BeautifulSoup(domains, 'lxml')
  377. #print(soup)
  378. try:
  379. table = soup.find("table")
  380. for row in table.findAll('tr')[1:]:
  381. # Alternative way to extract domain name
  382. # domain = row.find('td').find('a').text
  383. cells = row.findAll("td")
  384. if len(cells) >= 1:
  385. if keyword:
  386. c0 = row.find('td').find('a').text # domain
  387. c1 = cells[1].find(text=True) # bl
  388. c2 = cells[2].find(text=True) # domainpop
  389. c3 = cells[3].find(text=True) # birth
  390. c4 = cells[4].find(text=True) # Archive.org entries
  391. c5 = cells[5].find(text=True) # similarweb
  392. c6 = cells[6].find(text=True) # similarweb country code
  393. c7 = cells[7].find(text=True) # Dmoz.org
  394. c8 = cells[8].find(text=True) # status com
  395. c9 = cells[9].find(text=True) # status net
  396. c10 = cells[10].find(text=True) # status org
  397. c11 = cells[11].find(text=True) # status de
  398. c12 = cells[12].find(text=True) # tld registered
  399. c13 = cells[13].find(text=True) # Source List
  400. c14 = cells[14].find(text=True) # Domain Status
  401. c15 = "" # Related Domains
  402. # Non-keyword search table format is slightly different
  403. else:
  404. c0 = cells[0].find(text=True) # domain
  405. c1 = cells[1].find(text=True) # bl
  406. c2 = cells[2].find(text=True) # domainpop
  407. c3 = cells[3].find(text=True) # birth
  408. c4 = cells[4].find(text=True) # Archive.org entries
  409. c5 = cells[5].find(text=True) # similarweb
  410. c6 = cells[6].find(text=True) # similarweb country code
  411. c7 = cells[7].find(text=True) # Dmoz.org
  412. c8 = cells[8].find(text=True) # status com
  413. c9 = cells[9].find(text=True) # status net
  414. c10 = cells[10].find(text=True) # status org
  415. c11 = cells[11].find(text=True) # status de
  416. c12 = cells[12].find(text=True) # tld registered
  417. c13 = cells[13].find(text=True) # changes
  418. c14 = cells[14].find(text=True) # whois
  419. available = ''
  420. if c8 == "available":
  421. available += ".com "
  422. if c9 == "available":
  423. available += ".net "
  424. if c10 == "available":
  425. available += ".org "
  426. if c11 == "available":
  427. available += ".de "
  428. # Only grab status for keyword searches since it doesn't exist otherwise
  429. status = ""
  430. if keyword:
  431. status = c14
  432. # Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
  433. if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
  434. domain_list.append([c0,c3,c4,available,status])
  435. except Exception as e:
  436. #print(e)
  437. pass
  438. # Add additional sleep on requests to ExpiredDomains.net to avoid errors
  439. time.sleep(5)
  440. # Check for valid list results before continuing
  441. if len(domain_list) == 0:
  442. print("[-] No domain results found or none are currently available for purchase!")
  443. exit(0)
  444. else:
  445. if check:
  446. print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
  447. for domain_entry in domain_list:
  448. domain = domain_entry[0]
  449. birthdate = domain_entry[1]
  450. archiveentries = domain_entry[2]
  451. availabletlds = domain_entry[3]
  452. status = domain_entry[4]
  453. bluecoat = ''
  454. ibmxforce = ''
  455. ciscotalos = ''
  456. # Perform domain reputation checks
  457. if check:
  458. bluecoat = checkBluecoat(domain)
  459. print("[+] {}: {}".format(domain, bluecoat))
  460. ibmxforce = checkIBMXForce(domain)
  461. print("[+] {}: {}".format(domain, ibmxforce))
  462. ciscotalos = checkTalos(domain)
  463. print("[+] {}: {}".format(domain, ciscotalos))
  464. # Sleep to avoid captchas
  465. doSleep(timing)
  466. # Mark reputation checks as skipped if -c flag not present
  467. else:
  468. bluecoat = '-'
  469. ibmxforce = '-'
  470. ciscotalos = '-'
  471. # Append entry to new list with reputation if at least one service reports reputation
  472. if not ((bluecoat in ('Uncategorized','badurl','Suspicious','Malicious Sources/Malnets','captcha','Phishing')) and ibmxforce == "Not found." and ciscotalos == "Uncategorized"):
  473. data.append([domain,birthdate,archiveentries,availabletlds,status,bluecoat,ibmxforce,ciscotalos])
  474. # Sort domain list by column 2 (Birth Year)
  475. sortedDomains = sorted(data, key=lambda x: x[1], reverse=True)
  476. if len(sortedDomains) == 0:
  477. print("\n[-] No domains discovered with a desireable categorization!")
  478. exit(0)
  479. else:
  480. print("\n[*] {} of {} domains discovered with a potentially desireable categorization!".format(len(sortedDomains),len(domain_list)))
  481. # Build HTML Table
  482. html = ''
  483. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  484. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  485. htmlTableHeader = '''
  486. <table border="1" align="center">
  487. <th>Domain</th>
  488. <th>Birth</th>
  489. <th>Entries</th>
  490. <th>TLDs Available</th>
  491. <th>Status</th>
  492. <th>BlueCoat</th>
  493. <th>IBM X-Force</th>
  494. <th>Cisco Talos</th>
  495. <th>WatchGuard</th>
  496. <th>Namecheap</th>
  497. <th>Archive.org</th>
  498. '''
  499. htmlTableBody = ''
  500. htmlTableFooter = '</table>'
  501. htmlFooter = '</body></html>'
  502. # Build HTML table contents
  503. for i in sortedDomains:
  504. htmlTableBody += '<tr>'
  505. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  506. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  507. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  508. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  509. htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
  510. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/" target="_blank">{}</a></td>'.format(i[5]) # Bluecoat
  511. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">{}</a></td>'.format(i[0],i[6]) # IBM x-Force Categorization
  512. htmlTableBody += '<td><a href="https://www.talosintelligence.com/reputation_center/lookup?search={}" target="_blank">{}</a></td>'.format(i[0],i[7]) # Cisco Talos
  513. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  514. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  515. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  516. htmlTableBody += '</tr>'
  517. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  518. logfilename = "{}_domainreport.html".format(timestamp)
  519. log = open(logfilename,'w')
  520. log.write(html)
  521. log.close
  522. print("\n[*] Search complete")
  523. print("[*] Log written to {}\n".format(logfilename))
  524. # Print Text Table
  525. header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'BlueCoat', 'IBM', 'Cisco Talos']
  526. print(drawTable(header,sortedDomains))