util.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. '''
  2. /*#############################################################################
  3. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems(R).
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. ############################################################################ */
  14. '''
  15. import argparse
  16. import platform
  17. import logging
  18. import os
  19. import subprocess
  20. import sys
  21. import traceback
  22. from ..common.error import Error
  23. from ..common.shell import Shell
  24. def isPositiveIntNum(string):
  25. for i in range(0, len(string)):
  26. if (string[i] < '0') or (string[i] > '9'):
  27. return False
  28. return True
  29. def checkPqParam(string):
  30. if isPositiveIntNum(string) or (string == '-1'):
  31. value = int(string)
  32. else:
  33. msg = "Wrong value of threadNumber parameter: '"+string+"' !"
  34. raise argparse.ArgumentTypeError(msg)
  35. return value
  36. def checkXParam(string):
  37. param=str(string)
  38. if len(param):
  39. if ('=' in param) or ('None' == param):
  40. value = param
  41. else:
  42. raise Error("5000", err="But got argument:'%s'" % (param) )
  43. else:
  44. msg = "Missing argument of -X parameter!"
  45. raise argparse.ArgumentTypeError(msg)
  46. return value
  47. def getVersionNumbers():
  48. version = platform.python_version_tuple()
  49. verNum = {'main':0, 'minor':0, 'patch':0}
  50. if isPositiveIntNum(version[0]):
  51. verNum['main'] = int(version[0])
  52. if isPositiveIntNum(version[1]):
  53. verNum['minor'] = int(version[1])
  54. if isPositiveIntNum(version[2]):
  55. verNum['patch'] = int(version[2])
  56. return(verNum);
  57. def parentPath(osPath):
  58. # remove current dir
  59. [osPath, sep, curDir] = osPath.rpartition(os.sep)
  60. return osPath
  61. def convertPath(osPath):
  62. hpccPath = ''
  63. osPath = osPath.lstrip(os.sep)
  64. osPath = osPath.replace(os.sep, '::')
  65. for i in range(0, len(osPath)):
  66. if osPath[i] >= 'A' and osPath[i] <= 'Z':
  67. hpccPath = hpccPath +'^'
  68. hpccPath = hpccPath +osPath[i]
  69. return hpccPath
  70. gConfig = None
  71. def setConfig(config):
  72. global gConfig
  73. gConfig = config
  74. def getConfig():
  75. return gConfig
  76. def getEclRunArgs(test, engine, cluster):
  77. retString=''
  78. test.setJobname("")
  79. retString += "ecl run -fpickBestEngine=false --target=%s --cluster=%s --port=%s " % (engine, cluster, gConfig.espSocket)
  80. retString += "--exception-level=warning --noroot --name=\"%s\" " % (test.getJobname())
  81. retString += "%s " % (" ".join(test.getFParameters()))
  82. retString += "%s " % (" ".join(test.getDParameters()))
  83. retString += "%s " % (" ".join(test.getStoredInputParameters()))
  84. args = []
  85. addCommonEclArgs(args)
  86. retString += "%s " % (" ".join(args))
  87. return retString
  88. def addCommonEclArgs(args):
  89. args.append('--server=' + gConfig.espIp)
  90. args.append('--username=' + gConfig.username)
  91. args.append('--password=' + gConfig.password)
  92. args.append('--port=' + gConfig.espSocket)
  93. if gConfig.useSsl.lower() == 'true':
  94. args.append('--ssl')
  95. def queryWuid(jobname, taskId):
  96. shell = Shell()
  97. cmd = 'ecl'
  98. defaults = []
  99. args = []
  100. args.append('status')
  101. args.append('-v')
  102. args.append('-n=' + jobname)
  103. addCommonEclArgs(args)
  104. res, stderr = shell.command(cmd, *defaults)(*args)
  105. logging.debug("%3d. queryWuid(%s, cmd :'%s') result is: '%s'", taskId, jobname, cmd, res)
  106. wuid = "Not found"
  107. state = 'N/A'
  108. result = 'Fail'
  109. if len(res):
  110. resultItems = res.split(',')
  111. if len(resultItems) == 3:
  112. result = 'OK'
  113. for resultItem in resultItems:
  114. resultItem = resultItem.strip()
  115. [key, val] = resultItem.split(':')
  116. if key == 'ID':
  117. wuid = val
  118. if key == 'state':
  119. state = val
  120. return {'wuid':wuid, 'state':state, 'result':result}
  121. def queryEngineProcess(engine):
  122. retVal = []
  123. myProc = subprocess.Popen(["ps aux | egrep '"+engine+"' | egrep -v 'grep'"], shell=True, bufsize=8192, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  124. result = myProc.stdout.read() + myProc.stderr.read()
  125. results = result.split('\n')
  126. for line in results:
  127. line = line.replace('\n','')
  128. if len(line):
  129. items = line.split()
  130. if len(items) >= 12:
  131. if engine in items[10]:
  132. myProc2 = subprocess.Popen(["sudo readlink -f /proc/" + items[1] + "/exe"], shell=True, bufsize=8192, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  133. result2 = myProc2.stdout.read().replace ('\n', '')
  134. binPath = os.path.dirname(result2)
  135. if 'slavenum' in line:
  136. ind = [items.index(i) for i in items if i.startswith('slavenum')]
  137. slaveNum = items[ind[0]].split('=')[1]
  138. retVal.append({ 'process' : result2, 'name' : os.path.basename(items[10]), 'slaveNum' : slaveNum, 'pid' : items[1], 'binPath': binPath})
  139. else:
  140. retVal.append({ 'process' : result2, 'name' : os.path.basename(items[10]), 'slaveNum' : '', 'pid' : items[1], 'binPath': binPath})
  141. return retVal
  142. def createStackTrace(wuid, proc, taskId):
  143. binPath = proc['process']
  144. pid = proc['pid']
  145. outFile = os.path.expanduser(gConfig.logDir) + '/' + wuid +'-' + proc['name'] + proc['slaveNum'] + '.trace'
  146. logging.error("%3d. Create Stack Trace for %s%s (pid:%s) into '%s'", taskId, proc['name'], proc['slaveNum'], pid, outFile)
  147. cmd = 'gdb --batch --quiet -ex "set interactive-mode off" '
  148. cmd += '-ex "echo \nBacktrace for all threads\n==========================" -ex "thread apply all bt" '
  149. cmd += '-ex "echo \nRegisters:\n==========================\n" -ex "info reg" '
  150. cmd += '-ex "echo \nDisassembler:\n==========================\n" -ex "disas" '
  151. cmd += '-ex "quit" ' + binPath + ' ' + pid + ' > ' + outFile + ' 2>&1'
  152. myProc = subprocess.Popen([ cmd ], shell=True, bufsize=8192, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  153. result = myProc.stdout.read() + myProc.stderr.read()
  154. logging.debug("%3d. Create Stack Trace result:'%s'", taskId, result)
  155. def abortWorkunit(wuid, taskId = -1, engine = None):
  156. wuid = wuid.strip()
  157. logging.debug("%3d. abortWorkunit(wuid:'%s', engine: '%s')", taskId, wuid, str(engine))
  158. logging.debug("%3d. config: generateStackTrace: '%s'", taskId, str(gConfig.generateStackTrace))
  159. if (gConfig.generateStackTrace and (engine != None)):
  160. if isSudoer():
  161. if engine.startswith('thor'):
  162. hpccProcesses = queryEngineProcess("thormaster")
  163. hpccProcesses += queryEngineProcess("thorslave")
  164. elif engine.startswith('hthor'):
  165. hpccProcesses = queryEngineProcess("eclagent")
  166. elif engine.startswith('roxie'):
  167. hpccProcesses = queryEngineProcess("roxie")
  168. for p in hpccProcesses:
  169. createStackTrace(wuid, p, taskId)
  170. pass
  171. else:
  172. err = Error("7100")
  173. logging.error("%s. clearOSCache error:%s" % (taskId, err))
  174. logging.error(traceback.format_exc())
  175. raise Error(err)
  176. pass
  177. shell = Shell()
  178. cmd = 'ecl'
  179. defaults=[]
  180. args = []
  181. args.append('abort')
  182. args.append('-wu=' + wuid)
  183. addCommonEclArgs(args)
  184. state=shell.command(cmd, *defaults)(*args)
  185. return state
  186. def createZAP(wuid, taskId, reason=''):
  187. retVal = 'Error in create ZAP'
  188. zapFilePath = os.path.join(os.path.expanduser(gConfig.regressionDir), gConfig.zapDir)
  189. shell = Shell()
  190. cmd = 'ecl'
  191. defaults=[]
  192. args = []
  193. args.append('zapgen')
  194. args.append(wuid)
  195. args.append('--path=' + zapFilePath)
  196. if reason != '':
  197. args.append('--description=' + reason)
  198. else:
  199. args.append('--description="Failed in OBT"')
  200. args.append('--inc-thor-slave-logs')
  201. addCommonEclArgs(args)
  202. try:
  203. state=shell.command(cmd, *defaults)(*args)
  204. logging.debug("%3d. createZAP(state:%s)", taskId, str(state))
  205. if state[1] != '':
  206. retVal = state[1]
  207. else:
  208. retVal = state[0]
  209. except Exception as ex:
  210. state = "Unable to query "+ str(ex)
  211. logging.debug("%3d. %s in createZAP(%s)", taskId, state, wuid)
  212. retVal += " (" + str(ex). replace('\n',' ') + ")"
  213. return retVal
  214. def getRealIPAddress():
  215. ipAddress = '127.0.0.1'
  216. found = False
  217. try:
  218. proc = subprocess.Popen(['ip', '-o', '-4', 'addr', 'show'], shell=False, bufsize=8192, stdout=subprocess.PIPE, stderr=None)
  219. result = proc.communicate()[0]
  220. results = result.split('\n')
  221. for line in results:
  222. if 'scope global' in line:
  223. items = line.split()
  224. ipAddress = items[3].split('/')[0]
  225. found = True
  226. break;
  227. if not found:
  228. for line in results:
  229. items = line.split()
  230. ipAddress = items[3].split('/')[0]
  231. break;
  232. except OSError:
  233. pass
  234. finally:
  235. pass
  236. return ipAddress
  237. def checkClusters(clusters, targetSet):
  238. targetEngines =[]
  239. if 'all' in clusters:
  240. for engine in gConfig.Engines:
  241. targetEngines.append(str(engine))
  242. else:
  243. for engine in clusters:
  244. engine = engine.strip()
  245. if engine in gConfig.Engines:
  246. targetEngines.append(engine)
  247. else:
  248. logging.error("%s. Unknown engine:'%s' in %s:'%s'!" % (1, engine, targetSet, clusters))
  249. raise Error("4000")
  250. return targetEngines
  251. def isLocalIP(ip):
  252. retVal=False
  253. if '127.0.0.1' == ip:
  254. retVal = True
  255. elif ip == getRealIPAddress():
  256. retVal = True
  257. return retVal
  258. def checkHpccStatus():
  259. # Check HPCC Systems status on local/remote target
  260. isLocal = False
  261. isIpChecked={}
  262. config = getConfig()
  263. ip = config.espIp
  264. isIpChecked[ip] = False
  265. isLocal = isLocalIP(ip)
  266. try:
  267. if isLocal:
  268. # There is no remote version (yet)
  269. myProc = subprocess.Popen(["ecl --version"], shell=True, bufsize=8192, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  270. result = myProc.stdout.read() + myProc.stderr.read()
  271. results = result.split('\n')
  272. for line in results:
  273. if 'not found' in line:
  274. err = Error("6000")
  275. logging.error("%s. %s:'%s'" % (1, err, line))
  276. raise err
  277. break
  278. else:
  279. # Maybe use SSH to run "ecl --version" on a remote node
  280. pass
  281. args = []
  282. addCommonEclArgs(args)
  283. myProc = subprocess.Popen("ecl getname --wuid 'W*' --limit=5 " + " ".join(args), shell=True, bufsize=8192, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  284. result = myProc.stdout.read() + myProc.stderr.read()
  285. results = result.split('\n')
  286. for line in results:
  287. if "Error connecting" in line:
  288. if isLocal:
  289. err = Error("6001")
  290. logging.error("%s. %s:'%s local target!'" % (1, err, line))
  291. raise (err)
  292. else:
  293. err = Error("6004")
  294. logging.error("%s. %s:'%s remote target!'" % (1, err, line))
  295. raise (err)
  296. break
  297. if "command not found" in line:
  298. err = Error("6002")
  299. logging.error("%s. %s:'%s'" % (1, err, line))
  300. raise (err)
  301. break
  302. isIpChecked[ip] = True
  303. except OSError:
  304. err = Error("6002")
  305. logging.error("%s. checkHpccStatus error:%s!" % (1, err))
  306. raise Error(err)
  307. except ValueError:
  308. err = Error("6003")
  309. logging.error("%s. checkHpccStatus error:%s!" % (1, err))
  310. raise Error(err)
  311. finally:
  312. pass
  313. def isSudoer(testId = -1):
  314. retVal = False
  315. if 'linux' in sys.platform :
  316. tryCount = 5
  317. cmd = "timeout -k 2 2 sudo id && echo Access granted || echo Access denied"
  318. while tryCount > 0:
  319. tryCount -= 1
  320. myProc = subprocess.Popen([cmd], shell=True, bufsize=8192, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  321. (myStdout, myStderr) = myProc.communicate()
  322. result = "returncode:" + str(myProc.returncode) + ", stdout:\n'" + myStdout + "', stderr:\n'" + myStderr + "'."
  323. logging.debug("%3d. isSudoer() result is: '%s' (try count is:%d)", testId, result, tryCount)
  324. if 'timeout: invalid option' in myStderr:
  325. logging.debug("%3d. isSudoer() result is: '%s'", testId, result)
  326. cmd = "timeout 2 sudo id && echo Access granted || echo Access denied"
  327. logging.debug("%3d. try is without '-k 2' parameter: '%s'", testId, cmd)
  328. continue
  329. if 'Access denied' not in myStdout:
  330. retVal = True
  331. break
  332. if retVal == False:
  333. logging.debug("%3d. isSudoer() result is: '%s'", testId, result)
  334. return retVal
  335. def clearOSCache(testId = -1):
  336. if 'linux' in sys.platform :
  337. if isSudoer(testId):
  338. myProc = subprocess.Popen(["free; sudo -S sync; echo 3 | sudo tee /proc/sys/vm/drop_caches; free"], shell=True, bufsize=8192, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  339. (myStdout, myStderr) = myProc.communicate()
  340. result = "returncode:" + str(myProc.returncode) + ", stdout:\n'" + myStdout + "', stderr:\n'" + myStderr + "'."
  341. logging.debug("%3d. clearOSCache() result is: '%s'", testId, result)
  342. else:
  343. err = Error("7000")
  344. logging.error("%s. clearOSCache error:%s" % (testId, err))
  345. logging.error(traceback.format_exc())
  346. raise Error(err)
  347. else:
  348. logging.debug("%3d. clearOSCache() not supported on %s.", testId, sys.platform)
  349. pass