domainhunter.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: @joevest and @andrewchiles
  4. ## Description: Checks expired domains, reputation/categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # If the expected response format from a provider changes, use the traceback module to get a full stack trace without removing try/catch blocks
  7. #import traceback
  8. #traceback.print_exc()
  9. import time
  10. import random
  11. import argparse
  12. import json
  13. import base64
  14. import os
  15. __version__ = "20180917"
  16. ## Functions
  17. def doSleep(timing):
  18. if timing == 0:
  19. time.sleep(random.randrange(90,120))
  20. elif timing == 1:
  21. time.sleep(random.randrange(60,90))
  22. elif timing == 2:
  23. time.sleep(random.randrange(30,60))
  24. elif timing == 3:
  25. time.sleep(random.randrange(10,20))
  26. elif timing == 4:
  27. time.sleep(random.randrange(5,10))
  28. # There's no elif timing == 5 here because we don't want to sleep for -t 5
  29. def checkBluecoat(domain):
  30. try:
  31. url = 'https://sitereview.bluecoat.com/resource/lookup'
  32. postData = {'url':domain,'captcha':''}
  33. headers = {'User-Agent':useragent,
  34. 'Accept':'application/json, text/plain, */*',
  35. 'Content-Type':'application/json; charset=UTF-8',
  36. 'Referer':'https://sitereview.bluecoat.com/lookup'}
  37. print('[*] BlueCoat: {}'.format(domain))
  38. response = s.post(url,headers=headers,json=postData,verify=False)
  39. responseJSON = json.loads(response.text)
  40. if 'errorType' in responseJSON:
  41. a = responseJSON['errorType']
  42. else:
  43. a = responseJSON['categorization'][0]['name']
  44. # Print notice if CAPTCHAs are blocking accurate results and attempt to solve if --ocr
  45. if a == 'captcha':
  46. if ocr:
  47. # This request is also performed by a browser, but is not needed for our purposes
  48. #captcharequestURL = 'https://sitereview.bluecoat.com/resource/captcha-request'
  49. print('[*] Received CAPTCHA challenge!')
  50. captcha = solveCaptcha('https://sitereview.bluecoat.com/resource/captcha.jpg',s)
  51. if captcha:
  52. b64captcha = base64.urlsafe_b64encode(captcha.encode('utf-8')).decode('utf-8')
  53. # Send CAPTCHA solution via GET since inclusion with the domain categorization request doens't work anymore
  54. captchasolutionURL = 'https://sitereview.bluecoat.com/resource/captcha-request/{0}'.format(b64captcha)
  55. print('[*] Submiting CAPTCHA at {0}'.format(captchasolutionURL))
  56. response = s.get(url=captchasolutionURL,headers=headers,verify=False)
  57. # Try the categorization request again
  58. response = s.post(url,headers=headers,json=postData,verify=False)
  59. responseJSON = json.loads(response.text)
  60. if 'errorType' in responseJSON:
  61. a = responseJSON['errorType']
  62. else:
  63. a = responseJSON['categorization'][0]['name']
  64. else:
  65. print('[-] Error: Failed to solve BlueCoat CAPTCHA with OCR! Manually solve at "https://sitereview.bluecoat.com/sitereview.jsp"')
  66. else:
  67. print('[-] Error: BlueCoat CAPTCHA received. Try --ocr flag or manually solve a CAPTCHA at "https://sitereview.bluecoat.com/sitereview.jsp"')
  68. return a
  69. except Exception as e:
  70. print('[-] Error retrieving Bluecoat reputation! {0}'.format(e))
  71. return "-"
  72. def checkIBMXForce(domain):
  73. try:
  74. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  75. headers = {'User-Agent':useragent,
  76. 'Accept':'application/json, text/plain, */*',
  77. 'x-ui':'XFE',
  78. 'Origin':url,
  79. 'Referer':url}
  80. print('[*] IBM xForce: {}'.format(domain))
  81. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  82. response = s.get(url,headers=headers,verify=False)
  83. responseJSON = json.loads(response.text)
  84. if 'error' in responseJSON:
  85. a = responseJSON['error']
  86. elif not responseJSON['result']['cats']:
  87. a = 'Uncategorized'
  88. ## TO-DO - Add noticed when "intrusion" category is returned. This is indication of rate limit / brute-force protection hit on the endpoint
  89. else:
  90. categories = ''
  91. # Parse all dictionary keys and append to single string to get Category names
  92. for key in responseJSON["result"]['cats']:
  93. categories += '{0}, '.format(str(key))
  94. a = '{0}(Score: {1})'.format(categories,str(responseJSON['result']['score']))
  95. return a
  96. except:
  97. print('[-] Error retrieving IBM x-Force reputation!')
  98. return "-"
  99. def checkTalos(domain):
  100. url = 'https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc'.format(domain)
  101. headers = {'User-Agent':useragent,
  102. 'Referer':url}
  103. print('[*] Cisco Talos: {}'.format(domain))
  104. try:
  105. response = s.get(url,headers=headers,verify=False)
  106. responseJSON = json.loads(response.text)
  107. if 'error' in responseJSON:
  108. a = str(responseJSON['error'])
  109. if a == "Unfortunately, we can't find any results for your search.":
  110. a = 'Uncategorized'
  111. elif responseJSON['category'] is None:
  112. a = 'Uncategorized'
  113. else:
  114. a = '{0} (Score: {1})'.format(str(responseJSON['category']['description']), str(responseJSON['web_score_name']))
  115. return a
  116. except:
  117. print('[-] Error retrieving Talos reputation!')
  118. return "-"
  119. def checkMXToolbox(domain):
  120. url = 'https://mxtoolbox.com/Public/Tools/BrandReputation.aspx'
  121. headers = {'User-Agent':useragent,
  122. 'Origin':url,
  123. 'Referer':url}
  124. print('[*] Google SafeBrowsing and PhishTank: {}'.format(domain))
  125. try:
  126. response = s.get(url=url, headers=headers)
  127. soup = BeautifulSoup(response.content,'lxml')
  128. viewstate = soup.select('input[name=__VIEWSTATE]')[0]['value']
  129. viewstategenerator = soup.select('input[name=__VIEWSTATEGENERATOR]')[0]['value']
  130. eventvalidation = soup.select('input[name=__EVENTVALIDATION]')[0]['value']
  131. data = {
  132. "__EVENTTARGET": "",
  133. "__EVENTARGUMENT": "",
  134. "__VIEWSTATE": viewstate,
  135. "__VIEWSTATEGENERATOR": viewstategenerator,
  136. "__EVENTVALIDATION": eventvalidation,
  137. "ctl00$ContentPlaceHolder1$brandReputationUrl": domain,
  138. "ctl00$ContentPlaceHolder1$brandReputationDoLookup": "Brand Reputation Lookup",
  139. "ctl00$ucSignIn$hfRegCode": 'missing',
  140. "ctl00$ucSignIn$hfRedirectSignUp": '/Public/Tools/BrandReputation.aspx',
  141. "ctl00$ucSignIn$hfRedirectLogin": '',
  142. "ctl00$ucSignIn$txtEmailAddress": '',
  143. "ctl00$ucSignIn$cbNewAccount": 'cbNewAccount',
  144. "ctl00$ucSignIn$txtFullName": '',
  145. "ctl00$ucSignIn$txtModalNewPassword": '',
  146. "ctl00$ucSignIn$txtPhone": '',
  147. "ctl00$ucSignIn$txtCompanyName": '',
  148. "ctl00$ucSignIn$drpTitle": '',
  149. "ctl00$ucSignIn$txtTitleName": '',
  150. "ctl00$ucSignIn$txtModalPassword": ''
  151. }
  152. response = s.post(url=url, headers=headers, data=data)
  153. soup = BeautifulSoup(response.content,'lxml')
  154. a = ''
  155. if soup.select('div[id=ctl00_ContentPlaceHolder1_noIssuesFound]'):
  156. a = 'No issues found'
  157. return a
  158. else:
  159. if soup.select('div[id=ctl00_ContentPlaceHolder1_googleSafeBrowsingIssuesFound]'):
  160. a = 'Google SafeBrowsing Issues Found. '
  161. if soup.select('div[id=ctl00_ContentPlaceHolder1_phishTankIssuesFound]'):
  162. a += 'PhishTank Issues Found'
  163. return a
  164. except Exception as e:
  165. print('[-] Error retrieving Google SafeBrowsing and PhishTank reputation!')
  166. return "-"
  167. def downloadMalwareDomains(malwaredomainsURL):
  168. url = malwaredomainsURL
  169. response = s.get(url=url,headers=headers,verify=False)
  170. responseText = response.text
  171. if response.status_code == 200:
  172. return responseText
  173. else:
  174. print("[-] Error reaching:{} Status: {}").format(url, response.status_code)
  175. def checkDomain(domain):
  176. print('[*] Fetching domain reputation for: {}'.format(domain))
  177. if domain in maldomainsList:
  178. print("[!] {}: Identified as known malware domain (malwaredomains.com)".format(domain))
  179. bluecoat = checkBluecoat(domain)
  180. print("[+] {}: {}".format(domain, bluecoat))
  181. ibmxforce = checkIBMXForce(domain)
  182. print("[+] {}: {}".format(domain, ibmxforce))
  183. ciscotalos = checkTalos(domain)
  184. print("[+] {}: {}".format(domain, ciscotalos))
  185. mxtoolbox = checkMXToolbox(domain)
  186. print("[+] {}: {}".format(domain, mxtoolbox))
  187. print("")
  188. results = [domain,bluecoat,ibmxforce,ciscotalos,mxtoolbox]
  189. return results
  190. def solveCaptcha(url,session):
  191. # Downloads CAPTCHA image and saves to current directory for OCR with tesseract
  192. # Returns CAPTCHA string or False if error occured
  193. jpeg = 'captcha.jpg'
  194. try:
  195. response = session.get(url=url,headers=headers,verify=False, stream=True)
  196. if response.status_code == 200:
  197. with open(jpeg, 'wb') as f:
  198. response.raw.decode_content = True
  199. shutil.copyfileobj(response.raw, f)
  200. else:
  201. print('[-] Error downloading CAPTCHA file!')
  202. return False
  203. # Perform basic OCR without additional image enhancement
  204. text = pytesseract.image_to_string(Image.open(jpeg))
  205. text = text.replace(" ", "")
  206. # Remove CAPTCHA file
  207. try:
  208. os.remove(jpeg)
  209. except OSError:
  210. pass
  211. return text
  212. except Exception as e:
  213. print("[-] Error solving CAPTCHA - {0}".format(e))
  214. return False
  215. def drawTable(header,data):
  216. data.insert(0,header)
  217. t = Texttable(max_width=maxwidth)
  218. t.add_rows(data)
  219. t.header(header)
  220. return(t.draw())
  221. ## MAIN
  222. if __name__ == "__main__":
  223. parser = argparse.ArgumentParser(
  224. description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
  225. epilog = '''
  226. Examples:
  227. ./domainhunter.py -k apples -c --ocr -t5
  228. ./domainhunter.py --check --ocr -t3
  229. ./domainhunter.py --single mydomain.com
  230. ./domainhunter.py --keyword tech --check --ocr --timing 5 --alexa
  231. ./domaihunter.py --filename inputlist.txt --ocr --timing 5''',
  232. formatter_class=argparse.RawDescriptionHelpFormatter)
  233. parser.add_argument('-a','--alexa', help='Filter results to Alexa listings', required=False, default=0, action='store_const', const=1)
  234. parser.add_argument('-k','--keyword', help='Keyword used to refine search results', required=False, default=False, type=str, dest='keyword')
  235. parser.add_argument('-c','--check', help='Perform domain reputation checks', required=False, default=False, action='store_true', dest='check')
  236. parser.add_argument('-f','--filename', help='Specify input file of line delimited domain names to check', required=False, default=False, type=str, dest='filename')
  237. parser.add_argument('--ocr', help='Perform OCR on CAPTCHAs when challenged', required=False, default=False, action='store_true')
  238. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains', required=False, default=100, type=int, dest='maxresults')
  239. parser.add_argument('-s','--single', help='Performs detailed reputation checks against a single domain name/IP.', required=False, default=False, dest='single')
  240. parser.add_argument('-t','--timing', help='Modifies request timing to avoid CAPTCHAs. Slowest(0) = 90-120 seconds, Default(3) = 10-20 seconds, Fastest(5) = no delay', required=False, default=3, type=int, choices=range(0,6), dest='timing')
  241. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, default=400, type=int, dest='maxwidth')
  242. parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
  243. args = parser.parse_args()
  244. # Load dependent modules
  245. try:
  246. import requests
  247. from bs4 import BeautifulSoup
  248. from texttable import Texttable
  249. except Exception as e:
  250. print("Expired Domains Reputation Check")
  251. print("[-] Missing basic dependencies: {}".format(str(e)))
  252. print("[*] Install required dependencies by running `pip3 install -r requirements.txt`")
  253. quit(0)
  254. # Load OCR related modules if --ocr flag is set since these can be difficult to get working
  255. if args.ocr:
  256. try:
  257. import pytesseract
  258. from PIL import Image
  259. import shutil
  260. except Exception as e:
  261. print("Expired Domains Reputation Check")
  262. print("[-] Missing OCR dependencies: {}".format(str(e)))
  263. print("[*] Install required Python dependencies by running: pip3 install -r requirements.txt")
  264. print("[*] Ubuntu\Debian - Install tesseract by running: apt-get install tesseract-ocr python3-imaging")
  265. print("[*] macOS - Install tesseract with homebrew by running: brew install tesseract")
  266. quit(0)
  267. ## Variables
  268. alexa = args.alexa
  269. keyword = args.keyword
  270. check = args.check
  271. filename = args.filename
  272. maxresults = args.maxresults
  273. single = args.single
  274. timing = args.timing
  275. maxwidth = args.maxwidth
  276. ocr = args.ocr
  277. malwaredomainsURL = 'http://mirror1.malwaredomains.com/files/justdomains'
  278. expireddomainsqueryURL = 'https://www.expireddomains.net/domain-name-search'
  279. timestamp = time.strftime("%Y%m%d_%H%M%S")
  280. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  281. headers = {'User-Agent':useragent}
  282. requests.packages.urllib3.disable_warnings()
  283. # HTTP Session container, used to manage cookies, session tokens and other session information
  284. s = requests.Session()
  285. title = '''
  286. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  287. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  288. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  289. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  290. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  291. print(title)
  292. print("")
  293. print("Expired Domains Reputation Checker")
  294. print("Authors: @joevest and @andrewchiles\n")
  295. print("DISCLAIMER: This is for educational purposes only!")
  296. disclaimer = '''It is designed to promote education and the improvement of computer/cyber security.
  297. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  298. If you plan to use this content for illegal purpose, don't. Have a nice day :)'''
  299. print(disclaimer)
  300. print("")
  301. # Download known malware domains
  302. print('[*] Downloading malware domain list from {}\n'.format(malwaredomainsURL))
  303. maldomains = downloadMalwareDomains(malwaredomainsURL)
  304. maldomainsList = maldomains.split("\n")
  305. # Retrieve reputation for a single choosen domain (Quick Mode)
  306. if single:
  307. checkDomain(single)
  308. exit(0)
  309. # Perform detailed domain reputation checks against input file, print table, and quit
  310. if filename:
  311. # Initialize our list with an empty row for the header
  312. data = []
  313. try:
  314. with open(filename, 'r') as domainsList:
  315. for line in domainsList.read().splitlines():
  316. data.append(checkDomain(line))
  317. doSleep(timing)
  318. # Print results table
  319. header = ['Domain', 'BlueCoat', 'IBM X-Force', 'Cisco Talos', 'MXToolbox']
  320. print(drawTable(header,data))
  321. except KeyboardInterrupt:
  322. print('Caught keyboard interrupt. Exiting!')
  323. exit(0)
  324. except Exception as e:
  325. print('[-] Error: {}'.format(e))
  326. exit(1)
  327. exit(0)
  328. # Generic Proxy support
  329. # TODO: add as a parameter
  330. proxies = {
  331. 'http': 'http://127.0.0.1:8080',
  332. 'https': 'http://127.0.0.1:8080',
  333. }
  334. # Create an initial session
  335. domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False)
  336. # Use proxy like Burp for debugging request/parsing errors
  337. #domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False,proxies=proxies)
  338. # Lists for our ExpiredDomains results
  339. domain_list = []
  340. data = []
  341. # Generate list of URLs to query for expired/deleted domains
  342. urls = []
  343. # Use the keyword string to narrow domain search if provided. This generates a list of URLs to query
  344. if keyword:
  345. print('[*] Fetching expired or deleted domains containing "{}"'.format(keyword))
  346. for i in range (0,maxresults,25):
  347. if i == 0:
  348. urls.append("{}/?q={}&fwhois=22&falexa={}".format(expireddomainsqueryURL,keyword,alexa))
  349. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?q={}&start=1'.format(keyword)
  350. else:
  351. urls.append("{}/?start={}&q={}&fwhois=22&falexa={}".format(expireddomainsqueryURL,i,keyword,alexa))
  352. headers['Referer'] ='https://www.expireddomains.net/domain-name-search/?start={}&q={}'.format((i-25),keyword)
  353. # If no keyword provided, generate list of recently expired domains URLS (batches of 25 results).
  354. else:
  355. print('[*] Fetching expired or deleted domains...')
  356. # Caculate number of URLs to request since we're performing a request for two different resources instead of one
  357. numresults = int(maxresults / 2)
  358. for i in range (0,(numresults),25):
  359. urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}&ftlds[]=2&ftlds[]=3&ftlds[]=4&falexa={}'.format(i,alexa))
  360. urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}&ftlds[]=2&ftlds[]=3&ftlds[]=4&falexa={}'.format(i,alexa))
  361. for url in urls:
  362. print("[*] {}".format(url))
  363. # Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
  364. # are set in JavaScript and not recognized by Requests so we add them here manually.
  365. # May not be needed, but the _pk_id.10.dd0a cookie only requires a single . to be successful
  366. # In order to somewhat match a real cookie, but still be different, random integers are introduced
  367. r1 = random.randint(100000,999999)
  368. # Known good example _pk_id.10.dd0a cookie: 5abbbc772cbacfb1.1496760705.2.1496760705.1496760705
  369. pk_str = '5abbbc772cbacfb1' + '.1496' + str(r1) + '.2.1496' + str(r1) + '.1496' + str(r1)
  370. jar = requests.cookies.RequestsCookieJar()
  371. jar.set('_pk_ses.10.dd0a', '*', domain='expireddomains.net', path='/')
  372. jar.set('_pk_id.10.dd0a', pk_str, domain='expireddomains.net', path='/')
  373. domainrequest = s.get(url,headers=headers,verify=False,cookies=jar)
  374. #domainrequest = s.get(url,headers=headers,verify=False,cookies=jar,proxies=proxies)
  375. domains = domainrequest.text
  376. # Turn the HTML into a Beautiful Soup object
  377. soup = BeautifulSoup(domains, 'lxml')
  378. #print(soup)
  379. try:
  380. table = soup.find("table")
  381. rows = table.findAll('tr')[1:]
  382. for row in table.findAll('tr')[1:]:
  383. # Alternative way to extract domain name
  384. # domain = row.find('td').find('a').text
  385. cells = row.findAll("td")
  386. if len(cells) >= 1:
  387. if keyword:
  388. c0 = row.find('td').find('a').text # domain
  389. c1 = cells[1].find(text=True) # bl
  390. c2 = cells[2].find(text=True) # domainpop
  391. c3 = cells[3].find(text=True) # birth
  392. c4 = cells[4].find(text=True) # Archive.org entries
  393. c5 = cells[5].find(text=True) # Alexa
  394. c6 = cells[6].find(text=True) # Dmoz.org
  395. c7 = cells[7].find(text=True) # status com
  396. c8 = cells[8].find(text=True) # status net
  397. c9 = cells[9].find(text=True) # status org
  398. c10 = cells[10].find(text=True) # status de
  399. c11 = cells[11].find(text=True) # TLDs
  400. c12 = cells[12].find(text=True) # RDT
  401. c13 = cells[13].find(text=True) # List
  402. c14 = cells[14].find(text=True) # Status
  403. c15 = "" # Links
  404. # create available TLD list
  405. available = ''
  406. if c7 == "available":
  407. available += ".com "
  408. if c8 == "available":
  409. available += ".net "
  410. if c9 == "available":
  411. available += ".org "
  412. if c10 == "available":
  413. available += ".de "
  414. # Only grab status for keyword searches since it doesn't exist otherwise
  415. status = ""
  416. if keyword:
  417. status = c14
  418. # Only add Expired, not Pending, Backorder, etc
  419. if c13 == "Expired":
  420. # Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
  421. #if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
  422. # domain_list.append([c0,c3,c4,available,status])
  423. # Add other TLDs to list if marked available
  424. if (c7 == "available") and (c0 not in maldomainsList):
  425. dom = c0.split(".")[0] + ".com"
  426. domain_list.append([dom,c3,c4,available,status])
  427. if (c8 == "available") and (c0 not in maldomainsList):
  428. dom = c0.split(".")[0] + ".net"
  429. domain_list.append([dom,c3,c4,available,status])
  430. if (c9 == "available") and (c0 not in maldomainsList):
  431. dom = c0.split(".")[0] + ".org"
  432. domain_list.append([dom,c3,c4,available,status])
  433. if (c10 == "available") and (c0 not in maldomainsList):
  434. dom = c0.split(".")[0] + ".de"
  435. domain_list.append([dom,c3,c4,available,status])
  436. # Non-keyword search table format is slightly different
  437. else:
  438. c0 = cells[0].find(text=True) # domain
  439. c1 = cells[1].find(text=True) # bl
  440. c2 = cells[2].find(text=True) # domainpop
  441. c3 = cells[3].find(text=True) # birth
  442. c4 = cells[4].find(text=True) # Archive.org entries
  443. c5 = cells[5].find(text=True) # Alexa
  444. c6 = cells[6].find(text=True) # Dmoz.org
  445. c7 = cells[7].find(text=True) # status com
  446. c8 = cells[8].find(text=True) # status net
  447. c9 = cells[9].find(text=True) # status org
  448. c10 = cells[10].find(text=True) # status de
  449. c11 = cells[11].find(text=True) # TLDs
  450. c12 = cells[12].find(text=True) # RDT
  451. c13 = cells[13].find(text=True) # End Date
  452. c14 = cells[14].find(text=True) # Links
  453. # create available TLD list
  454. available = ''
  455. if c7 == "available":
  456. available += ".com "
  457. if c8 == "available":
  458. available += ".net "
  459. if c9 == "available":
  460. available += ".org "
  461. if c10 == "available":
  462. available += ".de "
  463. status = ""
  464. # Add other TLDs to list if marked available
  465. if (c7 == "available") and (c0 not in maldomainsList):
  466. dom = c0.split(".")[0] + ".com"
  467. domain_list.append([dom,c3,c4,available,status])
  468. if (c8 == "available") and (c0 not in maldomainsList):
  469. dom = c0.split(".")[0] + ".net"
  470. domain_list.append([dom,c3,c4,available,status])
  471. if (c9 == "available") and (c0 not in maldomainsList):
  472. dom = c0.split(".")[0] + ".org"
  473. domain_list.append([dom,c3,c4,available,status])
  474. if (c10 == "available") and (c0 not in maldomainsList):
  475. dom = c0.split(".")[0] + ".de"
  476. domain_list.append([dom,c3,c4,available,status])
  477. # Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
  478. #if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
  479. # domain_list.append([c0,c3,c4,available,status])
  480. except Exception as e:
  481. print("[!] Error: ", e)
  482. pass
  483. # Add additional sleep on requests to ExpiredDomains.net to avoid errors
  484. time.sleep(5)
  485. # Check for valid list results before continuing
  486. if len(domain_list) == 0:
  487. print("[-] No domain results found or none are currently available for purchase!")
  488. exit(0)
  489. else:
  490. if check:
  491. print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
  492. domain_list_unique = []
  493. [domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
  494. index = 1
  495. for domain_entry in domain_list_unique:
  496. domain = domain_entry[0]
  497. birthdate = domain_entry[1]
  498. archiveentries = domain_entry[2]
  499. availabletlds = domain_entry[3]
  500. status = domain_entry[4]
  501. bluecoat = ''
  502. ibmxforce = ''
  503. ciscotalos = ''
  504. # Perform domain reputation checks
  505. if check:
  506. print("[*] Domain {} of {}".format(str(index),str(len(domain_list))))
  507. bluecoat = checkBluecoat(domain)
  508. print("[+] {}: {}".format(domain, bluecoat))
  509. ibmxforce = checkIBMXForce(domain)
  510. print("[+] {}: {}".format(domain, ibmxforce))
  511. ciscotalos = checkTalos(domain)
  512. print("[+] {}: {}".format(domain, ciscotalos))
  513. # Sleep to avoid captchas
  514. doSleep(timing)
  515. # Mark reputation checks as skipped if -c flag not present
  516. else:
  517. bluecoat = '-'
  518. ibmxforce = '-'
  519. ciscotalos = '-'
  520. # Append entry to new list with reputation if at least one service reports reputation
  521. if not ((bluecoat in ('Uncategorized','badurl','Suspicious','Malicious Sources/Malnets','captcha','Phishing')) and ibmxforce == "Not found." and ciscotalos == "Uncategorized"):
  522. data.append([domain,birthdate,archiveentries,availabletlds,status,bluecoat,ibmxforce,ciscotalos])
  523. index += 1
  524. # Sort domain list by column 2 (Birth Year)
  525. sortedDomains = sorted(data, key=lambda x: x[1], reverse=True)
  526. if len(sortedDomains) == 0:
  527. print("\n[-] No domains discovered with a desireable categorization!")
  528. exit(0)
  529. else:
  530. print("\n[*] {} of {} domains discovered with a potentially desireable categorization!".format(len(sortedDomains),len(domain_list)))
  531. # Build HTML Table
  532. html = ''
  533. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  534. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  535. htmlTableHeader = '''
  536. <table border="1" align="center">
  537. <th>Domain</th>
  538. <th>Birth</th>
  539. <th>Entries</th>
  540. <th>TLDs Available</th>
  541. <th>Status</th>
  542. <th>BlueCoat</th>
  543. <th>IBM X-Force</th>
  544. <th>Cisco Talos</th>
  545. <th>WatchGuard</th>
  546. <th>Namecheap</th>
  547. <th>Archive.org</th>
  548. '''
  549. htmlTableBody = ''
  550. htmlTableFooter = '</table>'
  551. htmlFooter = '</body></html>'
  552. # Build HTML table contents
  553. for i in sortedDomains:
  554. htmlTableBody += '<tr>'
  555. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  556. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  557. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  558. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  559. htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
  560. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/" target="_blank">{}</a></td>'.format(i[5]) # Bluecoat
  561. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">{}</a></td>'.format(i[0],i[6]) # IBM x-Force Categorization
  562. htmlTableBody += '<td><a href="https://www.talosintelligence.com/reputation_center/lookup?search={}" target="_blank">{}</a></td>'.format(i[0],i[7]) # Cisco Talos
  563. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  564. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  565. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  566. htmlTableBody += '</tr>'
  567. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  568. logfilename = "{}_domainreport.html".format(timestamp)
  569. log = open(logfilename,'w')
  570. log.write(html)
  571. log.close
  572. print("\n[*] Search complete")
  573. print("[*] Log written to {}\n".format(logfilename))
  574. # Print Text Table
  575. header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'BlueCoat', 'IBM', 'Cisco Talos']
  576. print(drawTable(header,sortedDomains))