hpcc_common.in 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068
  1. ## hpcc_common.lib
  2. ################################################################################
  3. # HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. ################################################################################
  17. #
  18. # A series of functions that are common to all hpcc-init processes
  19. #
  20. ## cfg.parser parses an ini format file and when given a section places
  21. ## the associated variables with the section in to local scope.
  22. ##
  23. ## $1="<path to ini file>"
  24. ####
  25. ###<REPLACE>###
  26. START_STOP_DAEMON=${INSTALL_DIR}/bin/start-stop-daemon
  27. which_service(){
  28. SERV=`type --path service`
  29. if [ "${SERV}" == "" ]; then
  30. if [ -e "/sbin/service" ]; then
  31. SERV=/sbin/service
  32. elif [ -e "/usr/sbin/service" ]; then
  33. SERV=/usr/sbin/service
  34. fi
  35. fi
  36. }
  37. which_pidof(){
  38. PIDOF=`type --path pidof`
  39. if [ "${PIDOF}" == "" ]; then
  40. if [ -e /bin/pidof ]; then
  41. PIDOF=/bin/pidof
  42. elif [ -e /sbin/pidof ]; then
  43. PIDOF=/sbin/pidof
  44. elif [ -e /usr/sbin/pidof ]; then
  45. PIDOF=/usr/sbin/pidof
  46. fi
  47. fi
  48. }
  49. source ${INSTALL_DIR}/etc/init.d/lock.sh
  50. source ${INSTALL_DIR}/etc/init.d/pid.sh
  51. source ${INSTALL_DIR}/etc/init.d/init-functions
  52. source ${INSTALL_DIR}/etc/init.d/export-path
  53. cfg.parser () {
  54. [[ "${VERBOSE:-0}" -eq 1 ]] && log_begin_msg "Parsing $1 config file..."
  55. if [ ! -e $1 ] || [ $# -lt 1 ]; then
  56. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg 1 && return 1 || return 1
  57. fi
  58. IFS=$'\n' && ini=( $(cat $1 | sed -e 's/[ \t]*=[ \t]*/=/') ) # convert to line-array
  59. ini=( ${ini[*]//[;#]*/} ) # remove comments
  60. ini=( ${ini[*]/#[/\}$'\n'cfg.section.} ) # set section prefix
  61. ini=( ${ini[*]/%]/ \(} ) # convert text2function (1)
  62. ini=( ${ini[*]/=/=\( } ) # convert item to array
  63. ini=( ${ini[*]/%/ \)} ) # close array parenthesis
  64. ini=( ${ini[*]/%\( \)/\(\) \{} ) # convert text2function (2)
  65. ini=( ${ini[*]/%\} \)/\}} ) # remove extra parenthesis
  66. ini[0]='' # remove first element
  67. ini[${#ini[*]} + 1]='}' # add the last brace
  68. #echo "${ini[*]}" # echoing the result
  69. eval "$(echo "${ini[*]}")" # eval the result
  70. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg 0
  71. }
  72. ## dir.parser parses the return of configgen -listcommondirs and builds local scope
  73. ## arrays for each return from the generator. It also builds an array of which
  74. ## elements can be used in the local scope ($dirArray).
  75. ##
  76. ## $@="line1 . . lineN"
  77. ##
  78. dir.parser() {
  79. [[ "${VERBOSE:-0}" -eq 1 ]] && log_begin_msg "Parsing configgen data..."
  80. if [ $# -lt 1 ]; then
  81. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg 1 && return 1 || return 1
  82. fi
  83. IFS=$'\n' && cmp=( $@ )
  84. cmp=( ${cmp[*]/#/dir_} )
  85. cmp=( ${cmp[*]/=/= dirItem=} )
  86. cmp=( ${cmp[*]/=/=\(} )
  87. cmp=( ${cmp[*]/%/ \)} )
  88. eval "$(echo "${cmp[*]}")"
  89. for i in ${!dir_*}; do
  90. dirArray=( ${dirArray[@]} $i )
  91. done
  92. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg 0
  93. }
  94. ## dir.getByName allows the user to pass the name of a directory from the <Directories/>
  95. ## tag they would like to receive and in return sets $dir_return in the local scope
  96. ## to the array that has been requested.
  97. ##
  98. ## $1="<directory name>"
  99. ##
  100. dir.getByName(){
  101. dir_return=""
  102. name=$1
  103. [[ "${VERBOSE:-0}" -eq 1 ]] && log_begin_msg "Retrieving dir by name: $1..\
  104. ."
  105. for x in ${dirArray[@]}; do
  106. eval z=(\${${x}[@]})
  107. if [ $x = "dir_${name}" ]; then
  108. dir_return=`echo ${z[@]} | awk -F" " '{print $1}' | awk -F"=" '{print $2}'`
  109. fi
  110. done
  111. if [[ "${VERBOSE:-0}" -eq 1 ]]; then
  112. if [ ! -z "${dir_return}" ]; then
  113. log_end_msg 0
  114. else
  115. log_end_msg 1
  116. fi
  117. fi
  118. }
  119. ## comp.parser parses the return of configgen and builds local scope arrays for each
  120. ## return from the generator. It also builds an array of which elements can be used in
  121. ## the local scope ($compArray).
  122. ##
  123. ## $@="line1 . . lineN"
  124. ##
  125. comp.parser() {
  126. [[ "${VERBOSE:-0}" -eq 1 ]] && log_begin_msg "Parsing configgen data..."
  127. if [ $# -lt 1 ]; then
  128. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg 1 && return 1 || return 1
  129. fi
  130. IFS=$'\n' && cmp=( $@ )
  131. cmp=( ${cmp[*]/#/comp_} )
  132. cmp=( ${cmp[*]/=/= compType=} )
  133. cmp=( ${cmp[*]/;/ compPath=} )
  134. cmp=( ${cmp[*]/;/ compStat=} )
  135. cmp=( ${cmp[*]/=/=\(} )
  136. cmp=( ${cmp[*]/%/ \)} )
  137. eval "$(echo "${cmp[*]}")"
  138. for i in ${!comp_*}; do
  139. compArray=( ${compArray[@]} $i )
  140. done
  141. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg 0
  142. }
  143. ## comp.getByType allows the user to pass the type of component for the local system they
  144. ## would like to recieve and in return sets $comp_return in the local scope to the array
  145. ## that has been requested.
  146. ##
  147. ## $1="<component type>"
  148. ##
  149. comp.getByType() {
  150. comp_return=""
  151. type=$1
  152. [[ "${VERBOSE:-0}" -eq 1 ]] && log_begin_msg "Retrieving components by type: $1..."
  153. for x in ${compArray[@]}; do
  154. eval z=(\${${x}[@]})
  155. eval $z
  156. if [ $compType = "${type}" ]; then
  157. comp_return=${z[@]}
  158. fi
  159. done
  160. if [[ "${VERBOSE:-0}" -eq 1 ]]; then
  161. if [ ! -z ${comp_return} ]; then
  162. echo "Returning 0"
  163. log_end_msg 0
  164. else
  165. echo "Returning 1"
  166. log_end_msg 1
  167. fi
  168. fi
  169. }
  170. ## comp.getByName allows the user to pass the type of component for the local system they
  171. ## would like to receive and in return sets $comp_return in the local scope to the array
  172. ## that has been requested.
  173. ##
  174. ## $1="<component type>"
  175. ##
  176. comp.getByName() {
  177. comp_return=""
  178. name=$1
  179. [[ "${VERBOSE:-0}" -eq 1 ]] && log_begin_msg "Retrieving component by name: $1..\
  180. ."
  181. for x in ${compArray[@]}; do
  182. eval z=(\${${x}[@]})
  183. eval $z
  184. if [ $x = "comp_${name}" ]; then
  185. comp_return=${z[@]}
  186. fi
  187. done
  188. if [[ "${VERBOSE:-0}" -eq 1 ]]; then
  189. if [ ! -z "${comp_return}" ]; then
  190. log_end_msg 0
  191. else
  192. log_end_msg 1
  193. fi
  194. fi
  195. }
  196. ## createDir allows the user to pass a directory to be created.
  197. ##
  198. ## $1="/dir/to/be/created/"
  199. ##
  200. createDir() {
  201. dir=$1
  202. [[ "${VERBOSE:-0}" -eq 1 ]] && log_begin_msg "Creating ${dir} directory..."
  203. if [ ! -d $dir ]; then
  204. mkdir -p $dir
  205. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg $? && return $? || return $?
  206. fi
  207. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg 1 && return 1 || return 1
  208. }
  209. ## removeDir allows the user to pass a directory to be removed.
  210. ##
  211. ## $1="/dir/to/be/removed/"
  212. ##
  213. removeDir() {
  214. dir=$1
  215. [[ "${VERBOSE:-0}" -eq 1 ]] && log_begin_msg "Removing ${dir} directory..."
  216. if [ -d $dir ]; then
  217. rm -rf $dir
  218. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg $? && return $? || return $?
  219. fi
  220. [[ "${VERBOSE:-0}" -eq 1 ]] && log_end_msg 1 && return 1 || return 1
  221. }
  222. # returns OK if $1 contains $2
  223. strstr() {
  224. [[ "${1#*$2*}" = "$1" ]] && return 1
  225. return 0
  226. }
  227. set_environmentvars() {
  228. HPCC_CONFIG=${HPCC_CONFIG:-${CONFIG_DIR}/${ENV_CONF_FILE}}
  229. ## Retrieve the Section to use from environment variable and if not set
  230. ## use default of "DEFAULT"
  231. ##
  232. SECTION=${SECTION:-DEFAULT}
  233. cfg.parser ${HPCC_CONFIG}
  234. cfg.section.${SECTION}
  235. if [ -n "${umask}" ]; then
  236. umask $umask
  237. fi
  238. }
  239. set_componentvars() {
  240. compName=$1
  241. comp.getByName ${compName}
  242. compPath=`echo $comp_return | cut -d ' ' -f 2 | cut -d '=' -f 2 `
  243. compType=`echo $comp_return | cut -d ' ' -f 1 | cut -d '=' -f 2 `
  244. PIDPATH=${pid}/init_${compName}.pid
  245. LOCKPATH=${lock}/$compName/$compName.lock
  246. COMPPIDPATH=${pid}/${compName}.pid
  247. }
  248. validate_configuration() {
  249. if ! validation_error=$(${configgen_path}/configgen -env ${envfile} -validateonly 2>&1); then
  250. log "get_commondirs(): validation failure ${envfile}"
  251. log "${validation_error}"
  252. echo -e "\033[31merror\033[0m: configgen xml validation failure"
  253. exit 1
  254. fi
  255. }
  256. get_commondirs() {
  257. componentFile="${path}/componentfiles/configxml"
  258. validate_configuration
  259. DIRS=$(${configgen_path}/configgen -env ${envfile} -id ${componentFile} -listcommondirs)
  260. rc=$?
  261. if [[ $rc -ne 0 ]]; then
  262. log "get_commondirs(): failure in configgen call"
  263. echo -e "\033[31merror\033[0m: get_commondirs() -> failure in configgen call"
  264. exit 1
  265. fi
  266. dir.parser ${DIRS}
  267. }
  268. configGenCmd() {
  269. componentFile="${path}/componentfiles/configxml"
  270. # Creating logfiles for component
  271. logDir=$log/${compName}
  272. validate_configuration
  273. configcmd="${configgen_path}/configgen -env ${envfile} -od ${runtime} -id ${componentFile} -c ${compName}"
  274. log "$configcmd"
  275. if [ "${USER}" != "${user}" ]; then
  276. su ${user} -c "$configcmd" 2>/dev/null
  277. else
  278. ${configcmd} 2>/dev/null
  279. fi
  280. rc=$?
  281. if [[ $rc -ne 0 ]]; then
  282. log "configGenCmd(): failure in configgen call"
  283. echo -e "\033[31merror\033[0m: configGenCmd() -> failure in configgen call"
  284. exit 1
  285. fi
  286. }
  287. createRuntime() {
  288. # Creating Directories for runtime environment
  289. if [ ! -d ${runtime} ]; then
  290. createDir ${runtime}
  291. fi
  292. if [ ! -d ${pid} ]; then
  293. createDir ${pid}
  294. fi
  295. if [ ! -d ${lock} ]; then
  296. createDir ${lock}
  297. fi
  298. if [ ! -d ${log} ]; then
  299. createDir ${log}
  300. fi
  301. chown -c $user:$group ${runtime} 1> /dev/null 2>/dev/null
  302. chown -c $user:$group ${pid} 1> /dev/null 2>/dev/null
  303. chown -c $user:$group ${lock} 1> /dev/null 2>/dev/null
  304. chown -c $user:$group ${log} 1> /dev/null 2>/dev/null
  305. [ -z "$compName" ] && return
  306. # Creating Component Specific directories
  307. # Creating pidfile specific directory and changing its owner permissions
  308. if [ ! -d "$pid/$compName" ]; then
  309. log "Creating Pidfile Directory $pid/$compName"
  310. createDir "$pid/$compName"
  311. fi
  312. if [ ! -d "$lock/$compName" ]; then
  313. log "Creating Lockfile Directory $lock/$compName"
  314. createDir "$lock/$compName"
  315. fi
  316. if [ ! -d "$log/$compName" ]; then
  317. log "Creating Log Directory $log/$compName"
  318. createDir "$log/$compName"
  319. fi
  320. # Creating runtime specific directory and changing its owner permissions
  321. if [ ! -d $compPath ]; then
  322. log "Creating Runtime Directory $compPath"
  323. createDir "$compPath"
  324. fi
  325. #change the permission for all component directory under var
  326. chown -c $user:$group "$pid/$compName" 1> /dev/null 2>/dev/null
  327. chown -c $user:$group "$lock/$compName" 1> /dev/null 2>/dev/null
  328. chown -c $user:$group "$log/$compName" 1> /dev/null 2>/dev/null
  329. chown -c $user:$group "$compPath" 1> /dev/null 2>/dev/null
  330. dir.getByName data
  331. chown -c $user:$group "${dir_return}" 1> /dev/null 2>/dev/null
  332. dir.getByName data2
  333. chown -c $user:$group "${dir_return}" 1> /dev/null 2>/dev/null
  334. dir.getByName data3
  335. chown -c $user:$group "${dir_return}" 1> /dev/null 2>/dev/null
  336. dir.getByName query
  337. chown -c $user:$group "${dir_return}" 1> /dev/null 2>/dev/null
  338. dir.getByName mirror
  339. chown -c $user:$group "${dir_return}" 1> /dev/null 2>/dev/null
  340. }
  341. # cleanup all standard files made during runtime
  342. cleanupRuntimeEnvironment() {
  343. unlock ${lock}/${compName}/${compName}.lock
  344. removePid ${pid}/init_${compName}.pid
  345. removePid ${pid}/${compName}.pid
  346. }
  347. start_dafilesrv() {
  348. /etc/init.d/dafilesrv status 1>/dev/null 2>/dev/null
  349. if [ $? -ne 0 ];then
  350. #Dafilesrv is not running so start it , before starting cleanup the lock and pid file.
  351. cleanupRuntimeEnvironment
  352. noStatusCheck=1
  353. /etc/init.d/dafilesrv setup 1>/dev/null 2>/dev/null
  354. startCmd ${compName} ${noStatusCheck}
  355. return $?
  356. else
  357. log "Component $compName already started ..."
  358. printf "Starting %-21s" "$compName ..."
  359. log_success_msg "Already started"
  360. return 0
  361. fi
  362. }
  363. startCmd() {
  364. noStatusCheck=$2
  365. printf "Starting %-21s" "$compName ..."
  366. log "compType = $compType"
  367. # use less heap when threaded
  368. export MALLOC_ARENA_MAX=8
  369. # Creating logfiles for component
  370. logDir=$log/${compName}
  371. if [[ ${noStatusCheck} -ne 1 ]]; then
  372. check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} 1
  373. RCSTART=$?
  374. if [[ ${RCSTART} -eq 4 ]];then
  375. checkPidExist $PIDPATH
  376. local initRunning=$__pidExists
  377. checkPidExist $COMPPIDPATH
  378. local compRunning=$__pidExists
  379. if [[ $compRunning -eq 1 || $initRunning -eq 1 ]]; then
  380. log "Orphaned Process"
  381. cleanup_component
  382. if [[ $? -eq 1 ]]; then
  383. log "Attempt to clean up component has failed"
  384. log_failure_msg
  385. return 1
  386. fi
  387. fi
  388. # do cleanup on successful cleanup_component return
  389. # and if RCSTART -eq 4 due to lockfile still being present
  390. log "Attempt to clean up component was successful"
  391. cleanupRuntimeEnvironment
  392. elif [[ ${RCSTART} -eq 2 ]]; then
  393. log "The component $compName was previously started but is in an unhealthy state"
  394. log " Could possibly still be attempting to start. Use the force flag to attmpt a restart"
  395. if [[ ${FORCE:-NO_FORCE} == "FORCE" ]]; then
  396. cleanup_component
  397. if [[ $? -eq 1 ]]; then
  398. log "Attempt to clean up unleathy state of $compName failed"
  399. log_failure_msg
  400. return 1
  401. else
  402. cleanupRuntimeEnvironment
  403. fi
  404. else
  405. # component is already started but waiting to become healthy
  406. log_success_msg "Waiting on sentinel file creation"
  407. return 0
  408. fi
  409. elif [[ ${RCSTART} -eq 0 ]]; then
  410. #Since component is already started but current script is failed till returning 0
  411. log "$compName ---> already started"
  412. log_success_msg
  413. return ${RCSTART}
  414. fi
  415. fi
  416. limits=(
  417. MIN_Hn_nofile="32768"
  418. MIN_Hc_core="unlimited"
  419. MIN_Hu_nproc="8192"
  420. MIN_Hr_rtprio="4"
  421. MIN_Hl_memlock="unlimited" )
  422. local i=0
  423. for element in "${limits[@]}"; do
  424. flag="-${element:4:2}"
  425. value=${element##*"="}
  426. default_value=$( ulimit $flag )
  427. if [[ "$value" != "unlimited" ]] && ([[ "$default_value" == "unlimited" ]] || [[ "$default_value" -gt "$value" ]]); then
  428. _temp=${element%%"="*}
  429. _temp="${_temp}=${default_value}"
  430. limits[i]=$_temp
  431. fi
  432. flag="-${element:5:1}"
  433. ulimit $flag ${limits[i]##*"="}
  434. i=$((i+1))
  435. done
  436. UMASK_ARG=""
  437. if [ -n "${umask}" ]; then
  438. UMASK_ARG="--umask ${umask}"
  439. fi
  440. EXEC_COMMAND="${bin_path}/init_${compType} "
  441. startcmd="${START_STOP_DAEMON} -S -p ${pid}/init_${compName}.pid -c ${user}:${group} -d ${compPath} ${UMASK_ARG} -m -x ${EXEC_COMMAND} -b"
  442. log "${startcmd}"
  443. # Creating a Lock
  444. lockPath=${lock}/${compName}
  445. if [ ! -d $lockPath ]; then
  446. mkdir -p $lockPath >>/dev/null 2>&1
  447. fi
  448. chown -c $user:$group $lockPath >> /dev/null 2>&1
  449. lock ${lock}/${compName}/${compName}.lock
  450. if [ $__lockCreated -eq 0 ]; then
  451. log "Cannot create the lock file. File locked by subsystem"
  452. log_failure_msg "Cannot create the lock file, File locked by subsystem"
  453. return 3
  454. fi
  455. eval $startcmd
  456. local WAITTIME=120
  457. local RCSTART=0
  458. local COMPONENT_HAS_STARTED=0
  459. local SENTINEL_CHECK=1
  460. if [ ${compType} = "dafilesrv" ]; then
  461. SENTINEL_CHECK=0
  462. fi
  463. while [[ ${WAITTIME} -gt 0 ]]; do
  464. WAITTIME=`expr ${WAITTIME} - 1`
  465. check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} ${SENTINEL_CHECK}
  466. RCSTART=$?
  467. if [[ ${RCSTART} -eq 0 ]]; then
  468. log_success_msg
  469. return 0;
  470. fi
  471. checkPidExist $PIDPATH
  472. local initRunning=$__pidExists
  473. if [[ $initRunning -eq 0 ]]; then
  474. log "${compName} failed to start cleanly"
  475. log "Refer to the log file for the binary ${compName} for more information"
  476. log_failure_msg
  477. cleanupRuntimeEnvironment
  478. return 1;
  479. fi
  480. sleep 1
  481. done
  482. log_timeout_msg
  483. log "${compName} has timed out, but may still be starting"
  484. chmod 644 ${envfile}
  485. return ${RCSTART}
  486. }
  487. stop_component() {
  488. printf "Stopping %-21s" "${compName}... "
  489. cd ${compPath}
  490. ####
  491. ## This is handling for when daemon is running as an orphan daemon. That is process is
  492. ## not running but associated pidfile and/or lockfiles do exist.
  493. ###
  494. FAILED=0
  495. check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} 0
  496. RCSTOP=$?
  497. if [[ ${RCSTOP} -eq 1 ]];then
  498. log "Already stopped"
  499. log_success_msg
  500. return 0
  501. elif [[ ${RCSTOP} -eq 4 ]]; then
  502. log "Orphaned process found"
  503. cleanup_component
  504. local ccReturn=$?
  505. if [[ ${ccReturn} -eq 0 ]]; then
  506. cleanupRuntimeEnvironment
  507. log_success_msg
  508. return 0
  509. else
  510. log "Failed to clean up orphans for $compName"
  511. log_failure_msg
  512. return 1
  513. fi
  514. fi
  515. stopcmd="${START_STOP_DAEMON} -K -p ${PIDPATH} >> tmp.txt 2>&1"
  516. log "$stopcmd"
  517. eval $stopcmd
  518. RESULT=0
  519. local waittime=30
  520. [[ $compType = "dali" ]] && waittime=720
  521. while [[ $RESULT -ne 1 && $waittime -gt 0 ]]; do
  522. check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} 0
  523. RESULT=$?
  524. ((waittime--))
  525. if ! ((waittime % 60)); then
  526. echo "still stopping ..."
  527. fi
  528. [[ $RESULT -ne 1 ]] && sleep 1
  529. done
  530. if [[ $RESULT -ne 1 ]]; then
  531. if [[ $waittime -eq 0 ]]; then
  532. log_failure_msg "Process may still be attempting to shut down cleanly"
  533. else
  534. log_failure_msg
  535. fi
  536. else
  537. log_success_msg
  538. fi
  539. cleanupRuntimeEnvironment
  540. RCSTOP=0
  541. return ${RCSTOP}
  542. }
  543. #--------------------------------------------------------------------------------
  544. # Component Specific Functions
  545. #--------------------------------------------------------------------------------
  546. start_component() {
  547. # Creating logdirs for component
  548. logDir=$log/${compName}
  549. if [ ! -d $logDir ]; then
  550. mkdir -p $logDir >> tmp.txt 2>&1
  551. chown -c $user:$group $logDir >> /dev/null 2>&1
  552. fi
  553. # Creating Runtime
  554. createRuntime $compName $compPath
  555. # starting Component
  556. configGenCmd $compName
  557. STAT=0;
  558. if [ ${runSetupOnly} -ne 1 ]
  559. then
  560. cd ${compPath} >>/dev/null 2>&1
  561. startCmd ${compName} 0
  562. STAT=$?
  563. fi
  564. return $STAT;
  565. }
  566. restart_component() {
  567. if strstr "${compType}" "dafilesrv" ;then
  568. /etc/init.d/dafilesrv status 1>/dev/null 2>/dev/null
  569. if [ $? -eq 0 ];then
  570. /etc/init.d/dafilesrv stop 2>/dev/null
  571. else
  572. log "$compName ---> Stopped. Now Starting ..."
  573. echo "Component $compName was not running. Will start it now for you ..."
  574. cleanupRuntimeEnvironment
  575. fi
  576. /etc/init.d/dafilesrv start 2>/dev/null
  577. else
  578. check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} 1
  579. RCRESTART=$?
  580. if [ $RCRESTART -ne 0 ];then
  581. log "$compName ---> Stopped. Now Starting ..."
  582. echo "Component $compName was not running. Will start it now for you ..."
  583. cleanupRuntimeEnvironment
  584. else
  585. stop_component ${compName}
  586. fi
  587. start_component $compName
  588. check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} 1
  589. RCRESTART=$?
  590. return $RCRESTART
  591. fi
  592. }
  593. status_component() {
  594. check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} 1
  595. RCSTATUS=$?
  596. getPid ${COMPPIDPATH}
  597. if [[ ${RCSTATUS} -ne 0 ]]; then
  598. log "${compName} ---> Stopped"
  599. printf "%-15s is stopped" "$compName"
  600. elif [[ ${compType} == "thor" ]]; then
  601. if [[ -e ${runtime}/${compName}/slaves && -e ${runtime}/${compName}/setvars ]]; then
  602. source ${runtime}/${compName}/setvars
  603. __slaves=$(cat ${runtime}/${compName}/slaves | wc -l)
  604. __slaveprocesses=$((${__slaves} * ${slavespernode}))
  605. log "${compName} ---> Running ( pid ${__pidValue} ) with {__slaveprocesses} slave process(es)"
  606. printf "%-15s ( pid %8s ) is running with %s slave process(es) ..." "${compName}" "${__pidValue}" "${__slaveprocesses}"
  607. else
  608. log "${compName} missing file in ${runtime}/${compName} necessary for status_component"
  609. printf "${compName} missing file in ${runtime}/${compName} necessary for status_component"
  610. fi
  611. else
  612. log "${compName} ---> Running ( pid ${__pidValue} )"
  613. printf "%-15s ( pid %8s ) is running ..." "${compName}" "${__pidValue}"
  614. fi
  615. echo ""
  616. return ${RCSTATUS}
  617. }
  618. setup_component() {
  619. printf "Setting %-22s" "$compName.... "
  620. runSetupOnly=1
  621. start_component ${compName}
  622. runSetupOnly=0
  623. if [ $? -eq 0 ];
  624. then
  625. log_success_msg ""
  626. return 0
  627. else
  628. log_failure_msg ""
  629. return 3
  630. fi
  631. }
  632. create_dropzone() {
  633. validate_configuration
  634. dropzones=$(${configgen_path}/configgen -env ${envfile} -listdirs)
  635. rc=$?
  636. if [[ $rc -ne 0 ]]; then
  637. log "create_dropzone(): failure in configgen call"
  638. echo -e "\033[31merror\033[0m: create_dropzone() -> failure in configgen call"
  639. exit 1
  640. fi
  641. for D in ${dropzones} ; do
  642. # Creating DropZone directory
  643. if [ ! -d ${D} ]; then
  644. mkdir -p $D > /dev/null 2>&1
  645. chown -c $user:$group $D > /dev/null 2>&1
  646. chmod 777 $D > /dev/null 2>&1
  647. fi
  648. done
  649. }
  650. check_user(){
  651. USER=$1
  652. id ${USER} > /dev/null
  653. if [ $? -eq 0 ];
  654. then
  655. return 1
  656. else
  657. return 0
  658. fi
  659. }
  660. check_group(){
  661. GROUP=$1
  662. touch /tmp/file.$$
  663. chgrp ${GROUP} /tmp/file.$$ > /dev/null 2>&1
  664. if [ $(stat -c %G /tmp/file.$$) = "${GROUP}" ];
  665. then
  666. rm -rf /tmp/file.$$
  667. return 1
  668. else
  669. rm -rf /tmp/file.$$
  670. return 0
  671. fi
  672. }
  673. add_user(){
  674. USER=$1
  675. GROUP=$2
  676. HOMEPATH=$3
  677. check_user ${USER}
  678. UFND=$?
  679. check_group ${GROUP}
  680. GFND=$?
  681. if [ ${GFND} -eq 1 ];
  682. then
  683. if [ ${UFND} -eq 1 ];
  684. then
  685. printf "Adding %s to group %s ..." "${USER}" "${GROUP}"
  686. CURRGROUPS=`id -nG ${USER} | tr ' ' ','`
  687. usermod -G ${CURRGROUPS},${GROUP} -c "${USER} Runtime User" ${USER}
  688. if [ $? -eq 0 ];
  689. then
  690. log_success_msg
  691. else
  692. log_failure_msg "Failed to add ${USER} to group ${GROUP}."
  693. return 1
  694. fi
  695. else
  696. printf "Adding %s to system ..." "${USER}"
  697. useradd -s ${SHELL} -r -m -d ${HOMEPATH} -g ${GROUP} -c "${USER} Runtime User" ${USER}
  698. passwd -l ${USER} 1>/dev/null 2>&1
  699. if [ $? -eq 0 ];
  700. then
  701. log_success_msg
  702. else
  703. log_failure_msg "Failed to add ${USER} to system."
  704. return 1
  705. fi
  706. fi
  707. else
  708. printf "Creating group %s ..." "${GROUP}"
  709. groupadd ${GROUP}
  710. if [ $? -eq 0 ];
  711. then
  712. log_success_msg
  713. else
  714. log_failure_msg "Failed to add group ${GROUP} to system."
  715. return 1
  716. fi
  717. if [ ${UFND} -eq 1 ];
  718. then
  719. printf "Adding %s to group %s ..." "${USER}" "${GROUP}"
  720. CURRGROUPS=`id -nG ${USER} | tr ' ' ','`
  721. usermod -G ${CURRGROUPS},${GROUP} -c "${USER} Runtime User" ${USER}
  722. if [ $? -eq 0 ];
  723. then
  724. log_success_msg
  725. else
  726. log_failure_msg "Failed to add ${USER} to group ${GROUP}."
  727. return 1
  728. fi
  729. else
  730. printf "Adding %s to system ..." "${USER}"
  731. useradd -s ${SHELL} -r -m -d ${HOMEPATH} -g ${GROUP} -c "${USER} Runtime User" ${USER}
  732. passwd -l ${USER} 1>/dev/null 2>&1
  733. if [ $? -eq 0 ];
  734. then
  735. log_success_msg
  736. else
  737. log_failure_msg "Failed to add ${USER} to system."
  738. return 1
  739. fi
  740. fi
  741. fi
  742. return 0
  743. }
  744. is_root(){
  745. if [ "$(id -u)" != "0" ]; then
  746. echo "This operation can be executed only by ROOT user"
  747. exit
  748. fi
  749. }
  750. is_user(){
  751. USER=$1
  752. if [ "$(id -u)" != "$(id -u ${USER})" ]; then
  753. echo "This operation should be run as ${USER}"
  754. exit
  755. fi
  756. }
  757. ##
  758. ## Usage: is_python_installed <verion>
  759. ## <version> (optional): format: <major>.<minor>
  760. ## when specified return 0 if python version is equal or higher
  761. ## than <version>. Otherwise return 1
  762. ##
  763. is_python_installed () {
  764. _expected_version=$1
  765. which python > /dev/null 2>&1
  766. [ $? -ne 0 ] && return 1
  767. if [ -n "$_expected_version" ]
  768. then
  769. _actual_version=$(python -V 2>&1 | cut -d' ' -f 2 | cut -d '.' -f -2)
  770. echo $_actual_version | grep -q -e "^[0-9]\.[0-9]$"
  771. [ $? -ne 0 ] && return 1
  772. if [[ "$_actual_version" < "$_expected_version" ]]
  773. then
  774. return 1
  775. fi
  776. fi
  777. return 0
  778. }
  779. ##
  780. ## Usage: run_cluster <script> <flag> <ip list file>
  781. ## script: script file to run
  782. ## flag (optional) : 1: exclude local host 0: run the script on all hosts in the cluster
  783. ## ip list file (optional): when specified the ip list will be used instead of from
  784. ## environment.xml
  785. ##
  786. run_cluster() {
  787. _cmd=$1
  788. _exclude_local=0
  789. [[ -n "$2" ]] && _exclude_local=$2
  790. if [[ -z "$3" ]]; then
  791. validate_configuration
  792. t_IP=$(${INSTALL_DIR}/sbin/configgen -env ${CONFIG_DIR}/${ENV_XML_FILE} -machines)
  793. rc=$?
  794. if [[ $rc -ne 0 ]]; then
  795. log "run_cluster(): failure in configgen call"
  796. echo -e "\033[31merror\033[0m: run_cluster() -> failure in configgen call"
  797. exit 1
  798. fi
  799. IPS0=$(echo $t_IP | awk 'BEGIN {FS=",";RS=" ";} {print $1;}' | sort | uniq)
  800. else
  801. IPS0=$(cat $3)
  802. fi
  803. # Exclude local ip if requested
  804. if [ $_exclude_local -eq 1 ]
  805. then
  806. local_IPS=$(/sbin/ifconfig -a | grep "[[:space:]]*inet[[:space:]]" | \
  807. sed 's/^[[:space:]]\+//g' | cut -d ' ' -f 2 | cut -d':' -f 2)
  808. for ip in $IPS0
  809. do
  810. _found=0
  811. for lip in $local_IPS
  812. do
  813. if [ "$ip" = "$lip" ]
  814. then
  815. _found=1
  816. break
  817. fi
  818. done
  819. [ $_found -eq 0 ] && IPS="${IPS:+"$IPS\\n"}$ip"
  820. done
  821. IPS=$(echo -e $IPS)
  822. else
  823. IPS=$IPS0
  824. fi
  825. _num_hosts=$(echo $IPS | wc -w)
  826. echo "Total $_num_hosts to process"
  827. _num_passed=0
  828. _num_failed=0
  829. _num_processed=0
  830. _pregress=0
  831. _log_file=${LOG_DIR}/cluster/se_$(basename $_cmd)_$(date +%Y%m%d_%H%M%S).log
  832. [ ! -d ${LOG_DIR}/cluster ] && mkdir -p ${LOG_DIR}/cluster
  833. [ -e $_log_file ] && rm -rf $_log_file
  834. if [ "$RUN_CLUSTER_DISPLAY_OUTPUT" != "TRUE" ]
  835. then
  836. echo ""
  837. echo -ne "Execution progress: ${_progress}%, succeed: $_num_passed, failed: $_num_failed \r"
  838. fi
  839. for ip in $IPS
  840. do
  841. if [ "$RUN_CLUSTER_DISPLAY_OUTPUT" = "TRUE" ]
  842. then
  843. echo
  844. eval $_cmd $ip | tee -a $_log_file 2>&1
  845. else
  846. eval $_cmd $ip >> $_log_file 2>&1
  847. fi
  848. if [ ${PIPESTATUS[0]} -eq 0 ]
  849. then
  850. _num_passed=$(expr $_num_passed \+ 1)
  851. else
  852. _num_failed=$(expr $_num_failed \+ 1)
  853. fi
  854. _num_processed=$(expr $_num_processed \+ 1)
  855. if [ "$RUN_CLUSTER_DISPLAY_OUTPUT" != "TRUE" ]
  856. then
  857. _progress=$(expr $_num_processed \* 100 / $_num_hosts)
  858. echo -ne "Execution progress: ${_progress}%, succeed: $_num_passed, failed: $_num_failed \r"
  859. fi
  860. done
  861. echo ""
  862. if [ $_num_passed -ne $_num_processed ]
  863. then
  864. echo "There are errors when executing $(basename $_cmd)"
  865. echo "Check log $_log_file for details."
  866. else
  867. echo "$(basename $_cmd) run successfully on all hosts in the the cluster."
  868. fi
  869. echo ""
  870. }
  871. ##
  872. ## Usage: cluster_tools_init
  873. ## Initialization for cluster tools
  874. ##
  875. cluster_tools_init() {
  876. set_environmentvars
  877. _cmd_prefix=
  878. [ "$(id -u)" != "0" ] && _cmd_prefix=sudo
  879. # Check and set log directory
  880. CLUSTER_LOG_DIR=${LOG_DIR}/cluster
  881. [ ! -d $LOG_DIR ] && ${_cmd_prefix} /etc/init.d/hpcc-init status > /dev/null 2>&1
  882. [ ! -d $CLUSTER_LOG_DIR ] && mkdir -p $CLUSTER_LOG_DIR
  883. # workaround inconsistency of stat command
  884. cluster_log_dir_owner=$(ls -ld $CLUSTER_LOG_DIR | awk '{print $3}')
  885. [ "$cluster_log_dir_owner" != "${user}" ] && ${_cmd_prefix} chown ${user}:${user} $CLUSTER_LOG_DIR
  886. }
  887. ##
  888. ## Cleanup component
  889. ##
  890. cleanup_component() {
  891. # used to get variables for frunssh
  892. # Necessary for when we source in setvars, since we aren't using the start-stop-demon the $HOME will
  893. # be set to /root or some other location, and not the appropriate directory, causing problems
  894. # with $SSHidentityfile
  895. set_environmentvars
  896. HOME=${home}/${user}
  897. instancedir=${runtime}/${compName}
  898. if [ -e $instancedir/setvars ]; then
  899. source $instancedir/setvars
  900. fi
  901. # grab the PID of our component, and in the case it doesn't exist, the pid of the init file (in case
  902. # it somehow is still alive)
  903. local cpidpath=${COMPPIDPATH}
  904. if [ "${compType}" = "thor" ] && [ ! -f "${cpidpath}" ]; then
  905. # if run_thor is sent a SIGKILL, the normal COMPPIDPATH file will no longer exist, we catch this and
  906. # fall back to the {compName}_master.pid file that will still be available.
  907. cpidpath=$( echo $cpidpath | sed 's/\.pid/_master.pid/' )
  908. fi
  909. getPid ${cpidpath}
  910. local mpid=$__pidValue
  911. if [ $mpid -eq 0 ]; then
  912. getPid ${PIDPATH}
  913. mpid=$__pidValue
  914. fi
  915. # start with SIGTERM and then follow up with SIGKILL if unsuccessful and force flag is set
  916. if [ $mpid -ne 0 ] ; then
  917. # use the mpid we collected to grab the group pid of the process, to kill off all siblings at once
  918. local pgid=$( ps -p $mpid -o pid,pgid | grep $mpid | awk '{ print $2 }' )
  919. kill -SIGTERM -$pgid > /dev/null 2>&1
  920. sleep 1
  921. local WAITTIME=60
  922. local RUNNING=1
  923. while [[ ${WAITTIME} -gt 0 ]]; do
  924. ((WAITTIME--))
  925. kill -0 -$pgid &> /dev/null
  926. if [[ $? -ne 0 ]];then
  927. log "${compName} orphans cleaned up"
  928. RUNNING=0
  929. break;
  930. else
  931. [[ "((WAITTIME % 5))" -eq 0 ]] && log "Waiting for ${compName} orphans to cleanup gracefully"
  932. sleep 1
  933. fi
  934. done
  935. # if still running and Force option is set, send sigkill
  936. if [[ ${RUNNING} -eq 1 && ${FORCE:-NO_FORCE} == "FORCE" ]]; then
  937. log "WARNING Force flag is set"
  938. log "WARNING sending SIGKILL to orphans in pid group for ${compName}"
  939. log "INFO If sockets used by the process are still in a TIME_WAIT state"
  940. log "INFO due to unclean shutdown, the operating system possibly will"
  941. log "INFO not release them until 60 seconds after SIGKILL was sent"
  942. kill -SIGKILL -$pgid > /dev/null 2>&1
  943. [[ $? -eq 0 ]] && RUNNING=0
  944. elif [[ ${RUNNING} -eq 1 && ${FORCE:-NO_FORCE} == "NO_FORCE" ]]; then
  945. log "INFO Unable to kill with SIGTERM. Use --force|-f to attempt SIGKILL"
  946. fi
  947. fi
  948. return $RUNNING
  949. }