domainhunter.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826
  1. #!/usr/bin/env python
  2. ## Title: domainhunter.py
  3. ## Author: @joevest and @andrewchiles
  4. ## Description: Checks expired domains, reputation/categorization, and Archive.org history to determine
  5. ## good candidates for phishing and C2 domain names
  6. # If the expected response format from a provider changes, use the traceback module to get a full stack trace without removing try/catch blocks
  7. #import traceback
  8. #traceback.print_exc()
  9. import time
  10. import random
  11. import argparse
  12. import json
  13. import base64
  14. import os
  15. import sys
  16. from urllib.parse import urlparse
  17. import getpass
  18. # Bluecoat XSRF
  19. from hashlib import sha256
  20. __version__ = "20221025"
  21. ## Functions
  22. def doSleep(timing):
  23. """Add nmap like random sleep interval for multiple requests"""
  24. if timing == 0:
  25. time.sleep(random.randrange(90,120))
  26. elif timing == 1:
  27. time.sleep(random.randrange(60,90))
  28. elif timing == 2:
  29. time.sleep(random.randrange(30,60))
  30. elif timing == 3:
  31. time.sleep(random.randrange(10,20))
  32. elif timing == 4:
  33. time.sleep(random.randrange(5,10))
  34. # There's no elif timing == 5 here because we don't want to sleep for -t 5
  35. def checkUmbrella(domain):
  36. """Umbrella Domain reputation service"""
  37. try:
  38. url = 'https://investigate.api.umbrella.com/domains/categorization/?showLabels'
  39. postData = [domain]
  40. headers = {
  41. 'User-Agent':useragent,
  42. 'Content-Type':'application/json; charset=UTF-8',
  43. 'Authorization': 'Bearer {}'.format(umbrella_apikey)
  44. }
  45. print('[*] Umbrella: {}'.format(domain))
  46. response = s.post(url,headers=headers,json=postData,verify=False,proxies=proxies)
  47. responseJSON = json.loads(response.text)
  48. if len(responseJSON[domain]['content_categories']) > 0:
  49. return responseJSON[domain]['content_categories'][0]
  50. else:
  51. return 'Uncategorized'
  52. except Exception as e:
  53. print('[-] Error retrieving Umbrella reputation! {0}'.format(e))
  54. return "error"
  55. def checkBluecoat(domain):
  56. """Symantec Sitereview Domain Reputation"""
  57. try:
  58. headers = {
  59. 'User-Agent':useragent,
  60. 'Referer':'http://sitereview.bluecoat.com/'}
  61. # Establish our session information
  62. response = s.get("https://sitereview.bluecoat.com/",headers=headers,verify=False,proxies=proxies)
  63. response = s.head("https://sitereview.bluecoat.com/resource/captcha-request",headers=headers,verify=False,proxies=proxies)
  64. # Pull the XSRF Token from the cookie jar
  65. session_cookies = s.cookies.get_dict()
  66. if "XSRF-TOKEN" in session_cookies:
  67. token = session_cookies["XSRF-TOKEN"]
  68. else:
  69. raise NameError("No XSRF-TOKEN found in the cookie jar")
  70. # Perform SiteReview lookup
  71. # BlueCoat Added base64 encoded phrases selected at random and sha256 hashing of the JSESSIONID
  72. phrases = [
  73. 'UGxlYXNlIGRvbid0IGZvcmNlIHVzIHRvIHRha2UgbWVhc3VyZXMgdGhhdCB3aWxsIG1ha2UgaXQgbW9yZSBkaWZmaWN1bHQgZm9yIGxlZ2l0aW1hdGUgdXNlcnMgdG8gbGV2ZXJhZ2UgdGhpcyBzZXJ2aWNlLg==',
  74. 'SWYgeW91IGNhbiByZWFkIHRoaXMsIHlvdSBhcmUgbGlrZWx5IGFib3V0IHRvIGRvIHNvbWV0aGluZyB0aGF0IGlzIGFnYWluc3Qgb3VyIFRlcm1zIG9mIFNlcnZpY2U=',
  75. 'RXZlbiBpZiB5b3UgYXJlIG5vdCBwYXJ0IG9mIGEgY29tbWVyY2lhbCBvcmdhbml6YXRpb24sIHNjcmlwdGluZyBhZ2FpbnN0IFNpdGUgUmV2aWV3IGlzIHN0aWxsIGFnYWluc3QgdGhlIFRlcm1zIG9mIFNlcnZpY2U=',
  76. 'U2NyaXB0aW5nIGFnYWluc3QgU2l0ZSBSZXZpZXcgaXMgYWdhaW5zdCB0aGUgU2l0ZSBSZXZpZXcgVGVybXMgb2YgU2VydmljZQ=='
  77. ]
  78. # New Bluecoat XSRF Code added May 2022 thanks to @froyo75
  79. xsrf_token_parts = token.split('-')
  80. xsrf_random_part = random.choice(xsrf_token_parts)
  81. key_data = xsrf_random_part + ': ' + token
  82. # Key used as part of POST data
  83. key = sha256(key_data.encode('utf-8')).hexdigest()
  84. random_phrase = base64.b64decode(random.choice(phrases)).decode('utf-8')
  85. phrase_data = xsrf_random_part + ': ' + random_phrase
  86. # Phrase used as part of POST data
  87. phrase = sha256(phrase_data.encode('utf-8')).hexdigest()
  88. postData = {
  89. 'url':domain,
  90. 'captcha':'',
  91. 'key':key,
  92. 'phrase':phrase, # Pick a random base64 phrase from the list
  93. 'source':'new-lookup'}
  94. headers = {'User-Agent':useragent,
  95. 'Accept':'application/json, text/plain, */*',
  96. 'Accept-Language':'en_US',
  97. 'Content-Type':'application/json; charset=UTF-8',
  98. 'X-XSRF-TOKEN':token,
  99. 'Referer':'http://sitereview.bluecoat.com/'}
  100. print('[*] BlueCoat: {}'.format(domain))
  101. response = s.post('https://sitereview.bluecoat.com/resource/lookup',headers=headers,json=postData,verify=False,proxies=proxies)
  102. # Check for any HTTP errors
  103. if response.status_code != 200:
  104. a = "HTTP Error ({}-{}) - Is your IP blocked?".format(response.status_code,response.reason)
  105. else:
  106. responseJSON = json.loads(response.text)
  107. if 'errorType' in responseJSON:
  108. a = responseJSON['errorType']
  109. else:
  110. a = responseJSON['categorization'][0]['name']
  111. # Print notice if CAPTCHAs are blocking accurate results and attempt to solve if --ocr
  112. if a == 'captcha':
  113. if ocr:
  114. # This request is also performed by a browser, but is not needed for our purposes
  115. #captcharequestURL = 'https://sitereview.bluecoat.com/resource/captcha-request'
  116. print('[*] Received CAPTCHA challenge!')
  117. captcha = solveCaptcha('https://sitereview.bluecoat.com/resource/captcha.jpg',s)
  118. if captcha:
  119. b64captcha = base64.urlsafe_b64encode(captcha.encode('utf-8')).decode('utf-8')
  120. # Send CAPTCHA solution via GET since inclusion with the domain categorization request doesn't work anymore
  121. captchasolutionURL = 'https://sitereview.bluecoat.com/resource/captcha-request/{0}'.format(b64captcha)
  122. print('[*] Submiting CAPTCHA at {0}'.format(captchasolutionURL))
  123. response = s.get(url=captchasolutionURL,headers=headers,verify=False,proxies=proxies)
  124. # Try the categorization request again
  125. response = s.post('https://sitereview.bluecoat.com/resource/lookup',headers=headers,json=postData,verify=False,proxies=proxies)
  126. responseJSON = json.loads(response.text)
  127. if 'errorType' in responseJSON:
  128. a = responseJSON['errorType']
  129. else:
  130. a = responseJSON['categorization'][0]['name']
  131. else:
  132. print('[-] Error: Failed to solve BlueCoat CAPTCHA with OCR! Manually solve at "https://sitereview.bluecoat.com/sitereview.jsp"')
  133. else:
  134. print('[-] Error: BlueCoat CAPTCHA received. Try --ocr flag or manually solve a CAPTCHA at "https://sitereview.bluecoat.com/sitereview.jsp"')
  135. return a
  136. except Exception as e:
  137. print('[-] Error retrieving Bluecoat reputation! {0}'.format(e))
  138. return "error"
  139. def checkIBMXForce(domain):
  140. """IBM XForce Domain Reputation"""
  141. try:
  142. url = 'https://exchange.xforce.ibmcloud.com/url/{}'.format(domain)
  143. headers = {'User-Agent':useragent,
  144. 'Accept':'application/json, text/plain, */*',
  145. 'x-ui':'XFE',
  146. 'Origin':url,
  147. 'Referer':url}
  148. print('[*] IBM xForce: {}'.format(domain))
  149. url = 'https://api.xforce.ibmcloud.com/url/{}'.format(domain)
  150. response = s.get(url,headers=headers,verify=False,proxies=proxies)
  151. responseJSON = json.loads(response.text)
  152. if 'error' in responseJSON:
  153. a = responseJSON['error']
  154. elif not responseJSON['result']['cats']:
  155. a = 'Uncategorized'
  156. ## TO-DO - Add noticed when "intrusion" category is returned. This is indication of rate limit / brute-force protection hit on the endpoint
  157. else:
  158. categories = ''
  159. # Parse all dictionary keys and append to single string to get Category names
  160. for key in responseJSON['result']['cats']:
  161. categories += '{0}, '.format(str(key))
  162. a = '{0}(Score: {1})'.format(categories,str(responseJSON['result']['score']))
  163. return a
  164. except Exception as e:
  165. print('[-] Error retrieving IBM-Xforce reputation! {0}'.format(e))
  166. return "error"
  167. def checkTalos(domain):
  168. """Cisco Talos Domain Reputation"""
  169. url = 'https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc'.format(domain)
  170. headers = {'User-Agent':useragent,
  171. 'Referer':url}
  172. print('[*] Cisco Talos: {}'.format(domain))
  173. try:
  174. response = s.get(url,headers=headers,verify=False,proxies=proxies)
  175. responseJSON = json.loads(response.text)
  176. if 'error' in responseJSON:
  177. a = str(responseJSON['error'])
  178. if a == "Unfortunately, we can't find any results for your search.":
  179. a = 'Uncategorized'
  180. elif responseJSON['category'] is None:
  181. a = 'Uncategorized'
  182. else:
  183. a = '{0} (Score: {1})'.format(str(responseJSON['category']['description']), str(responseJSON['web_score_name']))
  184. return a
  185. except Exception as e:
  186. print('[-] Error retrieving Talos reputation! {0}'.format(e))
  187. return "error"
  188. def checkMcAfeeWG(domain):
  189. """McAfee Web Gateway Domain Reputation"""
  190. try:
  191. print('[*] McAfee Web Gateway (Cloud): {}'.format(domain))
  192. # HTTP Session container, used to manage cookies, session tokens and other session information
  193. s = requests.Session()
  194. headers = {
  195. 'User-Agent':useragent,
  196. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
  197. 'Accept-Language': 'en-US,en;q=0.5',
  198. 'Accept-Encoding': 'gzip, deflate',
  199. 'Referer':'https://sitelookup.mcafee.com/'
  200. }
  201. # Establish our session information
  202. response = s.get("https://sitelookup.mcafee.com",headers=headers,verify=False,proxies=proxies)
  203. # Pull the hidden attributes from the response
  204. soup = BeautifulSoup(response.text,"html.parser")
  205. hidden_tags = soup.find_all("input", {"type": "hidden"})
  206. for tag in hidden_tags:
  207. if tag['name'] == 'sid':
  208. sid = tag['value']
  209. elif tag['name'] == 'e':
  210. e = tag['value']
  211. elif tag['name'] == 'c':
  212. c = tag['value']
  213. elif tag['name'] == 'p':
  214. p = tag['value']
  215. # Retrieve the categorization infos
  216. multipart_form_data = {
  217. 'sid': (None, sid),
  218. 'e': (None, e),
  219. 'c': (None, c),
  220. 'p': (None, p),
  221. 'action': (None, 'checksingle'),
  222. 'product': (None, '14-ts'),
  223. 'url': (None, domain)
  224. }
  225. response = s.post('https://sitelookup.mcafee.com/en/feedback/url',headers=headers,files=multipart_form_data,verify=False,proxies=proxies)
  226. if response.status_code == 200:
  227. soup = BeautifulSoup(response.text,"html.parser")
  228. for table in soup.findAll("table", {"class": ["result-table"]}):
  229. datas = table.find_all('td')
  230. if "not valid" in datas[2].text:
  231. a = 'Uncategorized'
  232. else:
  233. status = datas[2].text
  234. category = (datas[3].text[1:]).strip().replace('-',' -')
  235. web_reputation = datas[4].text
  236. a = '{0}, Status: {1}, Web Reputation: {2}'.format(category,status,web_reputation)
  237. return a
  238. else:
  239. raise Exception
  240. except Exception as e:
  241. print('[-] Error retrieving McAfee Web Gateway Domain Reputation!')
  242. return "error"
  243. def downloadMalwareDomains(malwaredomainsURL):
  244. """Downloads a current list of known malicious domains"""
  245. url = malwaredomainsURL
  246. response = s.get(url=url,headers=headers,verify=False,proxies=proxies)
  247. responseText = response.text
  248. if response.status_code == 200:
  249. return responseText
  250. else:
  251. print("[-] Error reaching:{} Status: {}").format(url, response.status_code)
  252. def checkDomain(domain):
  253. """Executes various domain reputation checks included in the project"""
  254. print('[*] Fetching domain reputation for: {}'.format(domain))
  255. if domain in maldomainsList:
  256. print("[!] {}: Identified as known malware domain (malwaredomains.com)".format(domain))
  257. bluecoat = checkBluecoat(domain)
  258. print("[+] {}: {}".format(domain, bluecoat))
  259. ibmxforce = checkIBMXForce(domain)
  260. print("[+] {}: {}".format(domain, ibmxforce))
  261. ciscotalos = checkTalos(domain)
  262. print("[+] {}: {}".format(domain, ciscotalos))
  263. umbrella = "not available"
  264. if len(umbrella_apikey):
  265. umbrella = checkUmbrella(domain)
  266. print("[+] {}: {}".format(domain, umbrella))
  267. mcafeewg = checkMcAfeeWG(domain)
  268. print("[+] {}: {}".format(domain, mcafeewg))
  269. print("")
  270. results = [domain,bluecoat,ibmxforce,ciscotalos,umbrella,mcafeewg]
  271. return results
  272. def solveCaptcha(url,session):
  273. """Downloads CAPTCHA image and saves to current directory for OCR with tesseract"""
  274. jpeg = 'captcha.jpg'
  275. try:
  276. response = session.get(url=url,headers=headers,verify=False, stream=True,proxies=proxies)
  277. if response.status_code == 200:
  278. with open(jpeg, 'wb') as f:
  279. response.raw.decode_content = True
  280. shutil.copyfileobj(response.raw, f)
  281. else:
  282. print('[-] Error downloading CAPTCHA file!')
  283. return False
  284. # Perform basic OCR without additional image enhancement
  285. text = pytesseract.image_to_string(Image.open(jpeg))
  286. text = text.replace(" ", "").rstrip()
  287. # Remove CAPTCHA file
  288. try:
  289. os.remove(jpeg)
  290. except OSError:
  291. pass
  292. return text
  293. except Exception as e:
  294. print("[-] Error solving CAPTCHA - {0}".format(e))
  295. return False
  296. def drawTable(header,data):
  297. """Generates a text based table for printing to the console"""
  298. data.insert(0,header)
  299. t = Texttable(max_width=maxwidth)
  300. t.add_rows(data)
  301. t.header(header)
  302. return(t.draw())
  303. def loginExpiredDomains():
  304. """Login to the ExpiredDomains site with supplied credentials"""
  305. data = "login=%s&password=%s&redirect_2_url=/begin" % (username, password)
  306. headers["Content-Type"] = "application/x-www-form-urlencoded"
  307. r = s.post(expireddomainHost + "/login/", headers=headers, data=data, proxies=proxies, verify=False, allow_redirects=False)
  308. cookies = s.cookies.get_dict()
  309. if "location" in r.headers:
  310. if "/login/" in r.headers["location"]:
  311. print("[!] Login failed")
  312. sys.exit()
  313. if "ExpiredDomainssessid" in cookies:
  314. print("[+] Login successful. ExpiredDomainssessid: %s" % (cookies["ExpiredDomainssessid"]))
  315. else:
  316. print("[!] Login failed")
  317. sys.exit()
  318. def getIndex(cells, index):
  319. if cells[index].find("a") == None:
  320. return cells[index].text.strip()
  321. return cells[index].find("a").text.strip()
  322. ## MAIN
  323. if __name__ == "__main__":
  324. parser = argparse.ArgumentParser(
  325. description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
  326. epilog = '''
  327. Examples:
  328. ./domainhunter.py -k apples -c --ocr -t5
  329. ./domainhunter.py --check --ocr -t3
  330. ./domainhunter.py --single mydomain.com
  331. ./domainhunter.py --keyword tech --check --ocr --timing 5 --alexa
  332. ./domaihunter.py --filename inputlist.txt --ocr --timing 5''',
  333. formatter_class=argparse.RawDescriptionHelpFormatter)
  334. parser.add_argument('-a','--alexa', help='Filter results to Alexa listings', required=False, default=0, action='store_const', const=1)
  335. parser.add_argument('-k','--keyword', help='Keyword used to refine search results', required=False, default=False, type=str, dest='keyword')
  336. parser.add_argument('-c','--check', help='Perform domain reputation checks', required=False, default=False, action='store_true', dest='check')
  337. parser.add_argument('-f','--filename', help='Specify input file of line delimited domain names to check', required=False, default=False, type=str, dest='filename')
  338. parser.add_argument('--ocr', help='Perform OCR on CAPTCHAs when challenged', required=False, default=False, action='store_true')
  339. parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains', required=False, default=100, type=int, dest='maxresults')
  340. parser.add_argument('-s','--single', help='Performs detailed reputation checks against a single domain name/IP.', required=False, default=False, dest='single')
  341. parser.add_argument('-t','--timing', help='Modifies request timing to avoid CAPTCHAs. Slowest(0) = 90-120 seconds, Default(3) = 10-20 seconds, Fastest(5) = no delay', required=False, default=3, type=int, choices=range(0,6), dest='timing')
  342. parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, default=400, type=int, dest='maxwidth')
  343. parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
  344. parser.add_argument("-P", "--proxy", required=False, default=None, help="proxy. ex https://127.0.0.1:8080")
  345. parser.add_argument("-u", "--username", required=False, default=None, type=str, help="username for expireddomains.net")
  346. parser.add_argument("-p", "--password", required=False, default=None, type=str, help="password for expireddomains.net")
  347. parser.add_argument("-o", "--output", required=False, default=None, type=str, help="output file path")
  348. parser.add_argument('-ks','--keyword-start', help='Keyword starts with used to refine search results', required=False, default="", type=str, dest='keyword_start')
  349. parser.add_argument('-ke','--keyword-end', help='Keyword ends with used to refine search results', required=False, default="", type=str, dest='keyword_end')
  350. parser.add_argument('-um','--umbrella-apikey', help='API Key for umbrella (paid)', required=False, default="", type=str, dest='umbrella_apikey')
  351. parser.add_argument('-q','--quiet', help='Surpress initial ASCII art and header', required=False, default=False, action='store_true', dest='quiet')
  352. args = parser.parse_args()
  353. # Load dependent modules
  354. try:
  355. import requests
  356. from bs4 import BeautifulSoup
  357. from texttable import Texttable
  358. except Exception as e:
  359. print("Expired Domains Reputation Check")
  360. print("[-] Missing basic dependencies: {}".format(str(e)))
  361. print("[*] Install required dependencies by running `pip3 install -r requirements.txt`")
  362. quit(0)
  363. # Load OCR related modules if --ocr flag is set since these can be difficult to get working
  364. if args.ocr:
  365. try:
  366. import pytesseract
  367. from PIL import Image
  368. import shutil
  369. except Exception as e:
  370. print("Expired Domains Reputation Check")
  371. print("[-] Missing OCR dependencies: {}".format(str(e)))
  372. print("[*] Install required Python dependencies by running: pip3 install -r requirements.txt")
  373. print("[*] Ubuntu\Debian - Install tesseract by running: apt-get install tesseract-ocr python3-imaging")
  374. print("[*] macOS - Install tesseract with homebrew by running: brew install tesseract")
  375. quit(0)
  376. ## Variables
  377. username = args.username
  378. password = args.password
  379. proxy = args.proxy
  380. alexa = args.alexa
  381. keyword = args.keyword
  382. check = args.check
  383. filename = args.filename
  384. maxresults = args.maxresults
  385. single = args.single
  386. timing = args.timing
  387. maxwidth = args.maxwidth
  388. ocr = args.ocr
  389. output = args.output
  390. keyword_start = args.keyword_start
  391. keyword_end = args.keyword_end
  392. umbrella_apikey = args.umbrella_apikey
  393. malwaredomainsURL = 'https://gitlab.com/gerowen/old-malware-domains-ad-list/-/raw/master/malwaredomainslist.txt'
  394. expireddomainsqueryURL = 'https://www.expireddomains.net/domain-name-search'
  395. expireddomainHost = "https://member.expireddomains.net"
  396. timestamp = time.strftime("%Y%m%d_%H%M%S")
  397. useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
  398. headers = {'User-Agent':useragent}
  399. proxies = {}
  400. requests.packages.urllib3.disable_warnings()
  401. # HTTP Session container, used to manage cookies, session tokens and other session information
  402. s = requests.Session()
  403. if(args.proxy != None):
  404. proxy_parts = urlparse(args.proxy)
  405. proxies["http"] = "http://%s" % (proxy_parts.netloc)
  406. proxies["https"] = "https://%s" % (proxy_parts.netloc)
  407. s.proxies = proxies
  408. title = '''
  409. ____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
  410. | _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
  411. | | | | | | | |\/| | / _ \ | || \| | | |_| | | | | \| | | | | _| | |_) |
  412. | |_| | |_| | | | |/ ___ \ | || |\ | | _ | |_| | |\ | | | | |___| _ <
  413. |____/ \___/|_| |_/_/ \_\___|_| \_| |_| |_|\___/|_| \_| |_| |_____|_| \_\ '''
  414. # Print header
  415. if not (args.quiet):
  416. print(title)
  417. print('''\nExpired Domains Reputation Checker
  418. Authors: @joevest and @andrewchiles\n
  419. DISCLAIMER: This is for educational purposes only!
  420. It is designed to promote education and the improvement of computer/cyber security.
  421. The authors or employers are not liable for any illegal act or misuse performed by any user of this tool.
  422. If you plan to use this content for illegal purpose, don't. Have a nice day :)\n''')
  423. # Download known malware domains
  424. # print('[*] Downloading malware domain list from {}\n'.format(malwaredomainsURL))
  425. maldomains = downloadMalwareDomains(malwaredomainsURL)
  426. maldomainsList = maldomains.split("\n")
  427. # Retrieve reputation for a single choosen domain (Quick Mode)
  428. if single:
  429. checkDomain(single)
  430. exit(0)
  431. # Perform detailed domain reputation checks against input file, print table, and quit. This does not generate an HTML report
  432. if filename:
  433. # Initialize our list with an empty row for the header
  434. data = []
  435. try:
  436. with open(filename, 'r') as domainsList:
  437. for line in domainsList.read().splitlines():
  438. data.append(checkDomain(line))
  439. doSleep(timing)
  440. # Print results table
  441. header = ['Domain', 'BlueCoat', 'IBM X-Force', 'Cisco Talos', 'Umbrella', 'McAfee Web Gateway (Cloud)']
  442. print(drawTable(header,data))
  443. except KeyboardInterrupt:
  444. print('Caught keyboard interrupt. Exiting!')
  445. exit(0)
  446. except Exception as e:
  447. print('[-] Error: {}'.format(e))
  448. exit(1)
  449. exit(0)
  450. # Lists for our ExpiredDomains results
  451. domain_list = []
  452. data = []
  453. # Generate list of URLs to query for expired/deleted domains
  454. urls = []
  455. if username == None or username == "":
  456. print('[-] Error: ExpiredDomains.net requires a username! Use the --username parameter')
  457. exit(1)
  458. if args.password == None or args.password == "":
  459. password = getpass.getpass("expireddomains.net Password: ")
  460. loginExpiredDomains()
  461. m = 200
  462. if maxresults < m:
  463. m = maxresults
  464. for i in range (0,(maxresults),m):
  465. k=""
  466. if keyword:
  467. k=keyword
  468. urls.append('{}/domains/combinedexpired/?fwhois=22&fadult=1&start={}&ftlds[]=2&ftlds[]=3&ftlds[]=4&flimit={}&fdomain={}&fdomainstart={}&fdomainend={}&falexa={}'.format(expireddomainHost,i,m,k,keyword_start,keyword_end,alexa))
  469. max_reached = False
  470. for url in urls:
  471. print("[*] {}".format(url))
  472. domainrequest = s.get(url,headers=headers,verify=False,proxies=proxies)
  473. domains = domainrequest.text
  474. # Turn the HTML into a Beautiful Soup object
  475. soup = BeautifulSoup(domains, 'html.parser')
  476. try:
  477. table = soup.find_all("table", class_="base1")
  478. tbody = table[0].select("tbody tr")
  479. for row in tbody:
  480. # Alternative way to extract domain name
  481. # domain = row.find('td').find('a').text
  482. cells = row.findAll("td")
  483. if len(cells) == 1:
  484. max_reached = True
  485. break # exit if max rows reached
  486. if len(cells) >= 1:
  487. c0 = getIndex(cells, 0).lower() # domain
  488. c1 = getIndex(cells, 3) # bl
  489. c2 = getIndex(cells, 4) # domainpop
  490. c3 = getIndex(cells, 5) # birth
  491. c4 = getIndex(cells, 7) # Archive.org entries
  492. c5 = getIndex(cells, 8) # Alexa
  493. c6 = getIndex(cells, 10) # Dmoz.org
  494. c7 = getIndex(cells, 12) # status com
  495. c8 = getIndex(cells, 13) # status net
  496. c9 = getIndex(cells, 14) # status org
  497. c10 = getIndex(cells, 17) # status de
  498. c11 = getIndex(cells, 11) # TLDs
  499. c12 = getIndex(cells, 19) # RDT
  500. c13 = "" # List
  501. c14 = getIndex(cells, 22) # Status
  502. c15 = "" # links
  503. # create available TLD list
  504. available = ''
  505. if c7 == "available":
  506. available += ".com "
  507. if c8 == "available":
  508. available += ".net "
  509. if c9 == "available":
  510. available += ".org "
  511. if c10 == "available":
  512. available += ".de "
  513. # Only grab status for keyword searches since it doesn't exist otherwise
  514. status = ""
  515. if keyword:
  516. status = c14
  517. if keyword:
  518. # Only add Expired, not Pending, Backorder, etc
  519. # "expired" isn't returned any more, I changed it to "available"
  520. if c14 == "available": # I'm not sure about this, seems like "expired" isn't an option anymore. expireddomains.net might not support this any more.
  521. # Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
  522. if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
  523. domain_list.append([c0,c3,c4,available,status])
  524. # Non-keyword search table format is slightly different
  525. else:
  526. # Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
  527. if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
  528. domain_list.append([c0,c3,c4,available,status])
  529. if max_reached:
  530. print("[*] All records returned")
  531. break
  532. except Exception as e:
  533. print("[!] Error: ", e)
  534. pass
  535. # Add additional sleep on requests to ExpiredDomains.net to avoid errors
  536. time.sleep(5)
  537. # Check for valid list results before continuing
  538. if len(domain_list) == 0:
  539. print("[-] No domain results found or none are currently available for purchase!")
  540. exit(0)
  541. else:
  542. domain_list_unique = []
  543. [domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
  544. # Print number of domains to perform reputation checks against
  545. if check:
  546. print("\n[*] Performing reputation checks for {} domains".format(len(domain_list_unique)))
  547. print("")
  548. for domain_entry in domain_list_unique:
  549. domain = domain_entry[0]
  550. birthdate = domain_entry[1]
  551. archiveentries = domain_entry[2]
  552. availabletlds = domain_entry[3]
  553. status = domain_entry[4]
  554. bluecoat = '-'
  555. ibmxforce = '-'
  556. ciscotalos = '-'
  557. umbrella = '-'
  558. # Perform domain reputation checks
  559. if check:
  560. unwantedResults = ['Uncategorized','error','Not found.','Spam','Spam URLs','Pornography','badurl','Suspicious','Malicious Sources/Malnets','captcha','Phishing','Placeholders']
  561. bluecoat = checkBluecoat(domain)
  562. if bluecoat not in unwantedResults:
  563. print("[+] Bluecoat - {}: {}".format(domain, bluecoat))
  564. ibmxforce = checkIBMXForce(domain)
  565. if ibmxforce not in unwantedResults:
  566. print("[+] IBM XForce - {}: {}".format(domain, ibmxforce))
  567. ciscotalos = checkTalos(domain)
  568. if ciscotalos not in unwantedResults:
  569. print("[+] Cisco Talos {}: {}".format(domain, ciscotalos))
  570. if len(umbrella_apikey):
  571. umbrella = checkUmbrella(domain)
  572. if umbrella not in unwantedResults:
  573. print("[+] Umbrella {}: {}".format(domain, umbrella))
  574. mcafeewg = checkMcAfeeWG(domain)
  575. if mcafeewg not in unwantedResults:
  576. print("[+] McAfee Web Gateway (Cloud) {}: {}".format(domain, mcafeewg))
  577. print("")
  578. # Sleep to avoid captchas
  579. doSleep(timing)
  580. # Append entry to new list with reputation if at least one service reports reputation
  581. if not (\
  582. (bluecoat in ('Uncategorized','badurl','Suspicious','Malicious Sources/Malnets','captcha','Phishing','Placeholders','Spam','error')) \
  583. and (ibmxforce in ('Not found.','error')) \
  584. and (ciscotalos in ('Uncategorized','error')) \
  585. and (umbrella in ('Uncategorized','None')) \
  586. and (mcafeewg in ('Uncategorized','error'))):
  587. data.append([domain,birthdate,archiveentries,availabletlds,status,bluecoat,ibmxforce,ciscotalos,umbrella,mcafeewg])
  588. # Sort domain list by column 2 (Birth Year)
  589. sortedDomains = sorted(data, key=lambda x: x[1], reverse=True)
  590. if check:
  591. if len(sortedDomains) == 0:
  592. print("[-] No domains discovered with a desireable categorization!")
  593. exit(0)
  594. else:
  595. print("[*] {} of {} domains discovered with a potentially desireable categorization!".format(len(sortedDomains),len(domain_list)))
  596. # Build HTML Table
  597. html = ''
  598. htmlHeader = '<html><head><title>Expired Domain List</title></head>'
  599. htmlBody = '<body><p>The following available domains report was generated at {}</p>'.format(timestamp)
  600. htmlTableHeader = '''
  601. <table border="1" align="center">
  602. <th>Domain</th>
  603. <th>Birth</th>
  604. <th>Entries</th>
  605. <th>TLDs Available</th>
  606. <th>Status</th>
  607. <th>BlueCoat</th>
  608. <th>IBM X-Force</th>
  609. <th>Cisco Talos</th>
  610. <th>Umbrella</th>
  611. <th>WatchGuard</th>
  612. <th>Namecheap</th>
  613. <th>Archive.org</th>
  614. '''
  615. htmlTableBody = ''
  616. htmlTableFooter = '</table>'
  617. htmlFooter = '</body></html>'
  618. # Build HTML table contents
  619. for i in sortedDomains:
  620. htmlTableBody += '<tr>'
  621. htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
  622. htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
  623. htmlTableBody += '<td>{}</td>'.format(i[2]) # Entries
  624. htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
  625. htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
  626. htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/" target="_blank">{}</a></td>'.format(i[5]) # Bluecoat
  627. htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">{}</a></td>'.format(i[0],i[6]) # IBM x-Force Categorization
  628. htmlTableBody += '<td><a href="https://www.talosintelligence.com/reputation_center/lookup?search={}" target="_blank">{}</a></td>'.format(i[0],i[7]) # Cisco Talos
  629. htmlTableBody += '<td>{}</td>'.format(i[8]) # Cisco Umbrella
  630. htmlTableBody += '<td><a href="https://sitelookup.mcafee.com/en/feedback/url?action=checksingle&url=http%3A%2F%2F{}&product=14-ts" target="_blank">{}</a></td>'.format(i[0],i[9]) # McAfee Web Gateway (Cloud)
  631. htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
  632. htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
  633. htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
  634. htmlTableBody += '</tr>'
  635. html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
  636. logfilename = "{}_domainreport.html".format(timestamp)
  637. if output != None:
  638. logfilename = output
  639. log = open(logfilename,'w')
  640. log.write(html)
  641. log.close
  642. print("\n[*] Search complete")
  643. print("[*] Log written to {}\n".format(logfilename))
  644. # Print Text Table
  645. header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'BlueCoat', 'IBM', 'Cisco Talos', 'Umbrella']
  646. print(drawTable(header,sortedDomains))