hpcc-run.sh.in 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. #!/bin/bash
  2. ################################################################################
  3. # HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. ################################################################################
  17. #
  18. # Usage: hpcc-run.sh [hpcc-init | dafilesrv] [-c component] <cmd>
  19. #
  20. # This is acomplished with a standard ssh command with the use of the
  21. # runtime users id_rsa file.
  22. ###<REPLACE>###
  23. source ${INSTALL_DIR}/etc/init.d/hpcc_common
  24. source ${INSTALL_DIR}/etc/init.d/init-functions
  25. source ${INSTALL_DIR}/etc/init.d/export-path
  26. print_usage(){
  27. echo
  28. if use_systemd; then
  29. echo "usage: hpcc-run.sh [-c component] [-n concurrent] [-s] [-S] {start|stop|restart|status}"
  30. else #sysv init
  31. echo "usage: hpcc-run.sh [-c component] [-a {hpcc-init|dafilesrv}] [-n concurrent] [-s] [-S] {start|stop|stopall|restart|status|setup}"
  32. echo " -a|--action: HPCC service name. Either hpcc-init (default) or dafilesrv."
  33. echo " -c|--comp: HPCC component. For example, mydali, myroxie, mythor, etc."
  34. fi
  35. if use_systemd; then
  36. echo " -c|--comp: HPCC component. For example, dali@mydali.service, roxie@myroxie.service, etc."
  37. fi
  38. echo " -n|--concurrent: How many concurrent instances to run. The default is equal to the number of nodes present."
  39. echo " -S|--sequentially: For the command to run sequentially. i.e. one host a time. (overrides -n)"
  40. echo " -s|--save: Save the result to a file named by ip."
  41. echo " -f|--force: Force kill orphaned process groups"
  42. echo " -d|--debug: set debug flag for hpcc-init on each node in the cluster"
  43. echo
  44. end 1
  45. }
  46. getIPS(){
  47. if [[ -z "${comp}" ]]; then
  48. IPS=`${INSTALL_DIR}/sbin/configgen -env ${envfile} -machines | awk -F, '{print \$1}' | sort | uniq`
  49. else
  50. if use_systemd; then
  51. patf="*@"
  52. patb=".service"
  53. tmp_comp=${comp##$patf}
  54. tmp_comp=${tmp_comp%%$patb}
  55. else
  56. tmp_comp=${comp}
  57. fi
  58. IPS=`${INSTALL_DIR}/sbin/configgen -env ${envfile} -listall | grep -e "${tmp_comp}" | awk -F, '{ print \$3 }' | sort | uniq`
  59. if [[ -z "${IPS}" ]]; then
  60. log_failure_msg "Component ${tmp_comp} not found"
  61. print_usage
  62. end 1
  63. fi
  64. fi
  65. }
  66. getDali(){
  67. DIP=`${INSTALL_DIR}/sbin/configgen -env ${envfile} -listall | grep Dali | awk -F, '{print \$3}' | sort | uniq`
  68. }
  69. createIPListFile(){
  70. local _output=$1
  71. echo "$IPS" > $_output
  72. }
  73. createIPListFileExcludeDIP(){
  74. local _input=$1
  75. local _output=$2
  76. grep -vwF "$DIP" $_input > $_output 2> /dev/null
  77. }
  78. doOneIP(){
  79. local _ip=$1
  80. local _action=$2
  81. shift 2
  82. local _args=$(echo "$@" | tr '\n' ' ')
  83. if ping -c 1 -w 5 -n $_ip > /dev/null 2>&1; then
  84. #echo "$_ip: Host is alive."
  85. local CAN_SSH="`ssh -i $home/$user/.ssh/id_rsa -o BatchMode=yes -o LogLevel=QUIET -o StrictHostKeyChecking=no $user@$_ip exit > /dev/null 2>&1; echo $?`"
  86. if [[ "$CAN_SSH" -eq 255 ]]; then
  87. echo "$_ip: Cannot SSH to host."
  88. return 1
  89. else
  90. hpccStatusFile=/tmp/hpcc_status_$$
  91. if use_systemd; then
  92. if [[ -z "${comp}" ]]; then
  93. local CMD="sudo systemctl ${arg} hpccsystems-platform.target"
  94. else
  95. local CMD="sudo systemctl ${arg} ${comp}"
  96. fi
  97. else
  98. local CMD="sudo ${INIT_PATH}/$_action $_args"
  99. fi
  100. echo "$_ip: Running $CMD"
  101. local CMD="$CMD | tee ${hpccStatusFile}"
  102. ssh -i $home/$user/.ssh/id_rsa -o LogLevel=QUIET $user@$_ip $CMD;
  103. scp -i $home/$user/.ssh/id_rsa $user@${_ip}:${hpccStatusFile} ${reportDir}/$_ip > /dev/null 2>&1
  104. local CMD="rm -rf $hpccStatusFile"
  105. ssh -i $home/$user/.ssh/id_rsa -o LogLevel=QUIET $user@$_ip $CMD
  106. rc=${PIPESTATUS[0]}
  107. echo
  108. return $rc
  109. fi
  110. else
  111. echo "$_ip: Cannot Ping host? (Host Alive?)"
  112. return 1
  113. fi
  114. }
  115. createScript(){
  116. local _scriptFile=$1
  117. local _action=$2
  118. shift 2
  119. local _args=$( echo "$@" | tr '\n' ' ')
  120. local hpccStatusFile=/tmp/hpcc_status_${dateTime}_$$
  121. cat > $_scriptFile <<SCRIPTFILE
  122. #!/bin/bash
  123. IP=\$1
  124. if ping -c 1 -w 5 -n \$IP > /dev/null 2>&1; then
  125. echo "\$IP: Host is alive."
  126. CAN_SSH="\`ssh -i $home/$user/.ssh/id_rsa -o BatchMode=yes -o LogLevel=QUIET -o StrictHostKeyChecking=no $user@\$IP exit > /dev/null 2>&1; echo \$?\`"
  127. if [[ "\$CAN_SSH" -eq 255 ]]; then
  128. echo "\$IP: Cannot SSH to host."
  129. exit 1
  130. else
  131. if which systemd 2>&1 1>/dev/null; then
  132. if [[ -z "${comp}" ]]; then
  133. if [[ "${arg}" == "status" ]]; then
  134. CMD="sudo /bin/systemctl list-dependencies hpccsystems-platform.target"
  135. else
  136. CMD="sudo /bin/systemctl ${arg} hpccsystems-platform.target"
  137. fi
  138. else
  139. if [[ "${arg}" == "status" ]]; then
  140. CMD="sudo /bin/systemctl list-dependencies ${comp}"
  141. else
  142. CMD="sudo /bin/systemctl ${arg} ${comp}"
  143. fi
  144. fi
  145. else
  146. CMD="sudo ${INIT_PATH}/$_action $_args"
  147. fi
  148. echo "\$IP: Running \$CMD"
  149. CMD="\$CMD | tee $hpccStatusFile"
  150. ssh -i $home/$user/.ssh/id_rsa -o LogLevel=QUIET $user@\$IP \$CMD;
  151. rc=\${PIPESTATUS[0]}
  152. scp -i $home/$user/.ssh/id_rsa $user@\${IP}:${hpccStatusFile} ${reportDir}/\$IP
  153. CMD="rm -rf $hpccStatusFile"
  154. ssh -i $home/$user/.ssh/id_rsa -o LogLevel=QUIET $user@\$IP \$CMD
  155. exit \$rc
  156. fi
  157. else
  158. echo "\$IP: Cannot Ping host? (Host Alive?)"
  159. exit 1
  160. fi
  161. SCRIPTFILE
  162. chmod +x $_scriptFile
  163. }
  164. runScript() {
  165. if [[ "$RUN_CLUSTER_DISPLAY_OUTPUT" = "FALSE" ]] && [[ $hasPython -eq 1 ]]; then
  166. OPTIONS="${OPTIONS} -n ${concurrent}"
  167. eval ${INSTALL_DIR}/sbin/cluster_script.py -f ${scriptFile} "$OPTIONS"
  168. local rc=$?
  169. else
  170. if [[ $hasPython -eq 0 ]]; then
  171. echo ""
  172. echo "Cannot detect python version ${expected_python_version}+. Will run on the cluster hosts sequentially."
  173. echo ""
  174. fi
  175. run_cluster ${scriptFile} 0 $1
  176. local rc=$?
  177. fi
  178. rm -rf $scriptFile &>/dev/null
  179. return $rc
  180. }
  181. doSetup() {
  182. init setup
  183. scriptFile=~/${action}_setup_$$
  184. createScript $scriptFile $action $args setup
  185. runScript
  186. report "${action} setup"
  187. }
  188. doStatus() {
  189. init status
  190. scriptFile=~/${action}_status_$$
  191. createScript $scriptFile $action $args status
  192. runScript
  193. report "${action} status"
  194. }
  195. doStop() {
  196. echo "$action stopping the cluster ..."
  197. init stop
  198. scriptFile=~/${action}_stop_$$
  199. if [[ -n "${comp}" ]]; then
  200. createScript $scriptFile $action $args stop
  201. OPTIONS="${DEFAULT_OPTIONS} -h $IPsFile"
  202. runScript $IPsFile
  203. report "${action} stop"
  204. else
  205. if [[ -e $IPsExcludeDIP ]]; then
  206. local numIPs=$(wc -l $IPsExcludeDIP | awk '{ print $1 }')
  207. if [[ $numIPs -gt 0 ]]; then
  208. createScript $scriptFile $action $args stop
  209. OPTIONS="${DEFAULT_OPTIONS} -h $IPsExcludeDIP"
  210. runScript $IPsExcludeDIP
  211. report "${action} stop" $DIP
  212. fi
  213. fi
  214. for _dip in $DIP; do
  215. doOneIP $_dip $action $args stop || end 1
  216. done
  217. fi
  218. }
  219. doStopall() {
  220. action="hpcc-init"
  221. doStop
  222. action="dafilesrv"
  223. doStop
  224. }
  225. doStart() {
  226. init start
  227. if [[ -n "${comp}" ]]; then
  228. local startFile=$IPsFile
  229. else
  230. for _dip in $DIP; do
  231. doOneIP $_dip $action $args start || end 1
  232. done
  233. local startFile=$IPsExcludeDIP
  234. fi
  235. if [[ -e $startFile ]]; then
  236. local numIPs=$(wc -l $startFile | awk '{ print $1 }')
  237. if [[ $numIPs -gt 0 ]]; then
  238. echo "$action starting the cluster ..."
  239. scriptFile=~/${action}_start_$$
  240. createScript $scriptFile $action $args start
  241. OPTIONS="${DEFAULT_OPTIONS} -h $startFile"
  242. runScript $startFile
  243. if [[ -n "${comp}" ]]; then
  244. report "${action} start"
  245. else
  246. report "${action} start" $DIP
  247. fi
  248. [[ $rc -ne 0 ]] && end $rc
  249. fi
  250. fi
  251. }
  252. init() {
  253. getIPS
  254. getDali
  255. IPsFile=/tmp/ip_list_$$
  256. createIPListFile $IPsFile
  257. IPsExcludeDIP=/tmp/ip_list_exclude_dip_$$
  258. createIPListFileExcludeDIP $IPsFile $IPsExcludeDIP
  259. if [[ $concurrent -eq 0 ]]; then
  260. concurrent=$( wc -l $IPsFile | awk '{ print $1 }')
  261. fi
  262. dateTime=$(date +"%Y%m%d_%H%M%S")
  263. reportDir=/var/log/HPCCSystems/cluster/$1/${dateTime}
  264. mkdir -p $reportDir
  265. chown -R ${user}:${user} ${reportDir}/..
  266. }
  267. report() {
  268. local _title=$1
  269. local hostToSkip=$2
  270. if [[ "$RUN_CLUSTER_DISPLAY_OUTPUT" = "FALSE" ]]; then
  271. ls ${reportDir} | while read _host; do
  272. [[ "$_host" = "$hostToSkip" ]] && continue
  273. local _message=$(cat ${reportDir}/$_host | grep -v "ervice dafilesrv" | grep -v -e "^[[:space:]]*$")
  274. if [[ -n "$_message" ]]; then
  275. echo "$_host $_title :"
  276. echo -e "$_message\n"
  277. fi
  278. done
  279. fi
  280. }
  281. end() {
  282. if [[ $save -eq 1 ]]; then
  283. echo "Cluster status is saved under $reportDir"
  284. echo
  285. else
  286. rm -rf $reportDir
  287. fi
  288. [[ -e "${IPsExcludeDIP}" ]] && rm -rf ${IPsExcludeDIP}
  289. [[ -e "${IPsFile}" ]] && rm -rf ${IPsFile}
  290. exit $1
  291. }
  292. use_systemd() {
  293. if which systemd 1>/dev/null 2>&1; then
  294. systemd_path=`which systemd`
  295. return 0
  296. else
  297. return 1
  298. fi
  299. }
  300. ############################################
  301. #
  302. # MAIN
  303. #
  304. ############################################
  305. cluster_tools_init
  306. if [[ "$(whoami)" != "root" ]] && [[ "$(whoami)" != "${user}" ]]; then
  307. echo ""
  308. echo "The script must run as root, $user or sudo."
  309. echo ""
  310. exit 1
  311. fi
  312. envfile=$configs/$environment
  313. configfile=${CONFIG_DIR}/${ENV_CONF_FILE}
  314. hasPython=0
  315. save=0
  316. expected_python_version=3.4
  317. is_python_installed $expected_python_version
  318. [[ $? -eq 0 ]] && hasPython=1
  319. concurrent=0
  320. RUN_CLUSTER_DISPLAY_OUTPUT=FALSE
  321. DEFAULT_OPTIONS="-e $configfile -s ${SECTION:-DEFAULT}"
  322. if use_systemd; then
  323. TEMP=`/usr/bin/getopt -o c:n:sShfd --long help,comp:,save,concurrent:,sequentially,force,debug -n 'hpcc-run' -- "$@"`
  324. else
  325. TEMP=`/usr/bin/getopt -o a:c:n:sShfd --long help,comp:,action:,save,concurrent:,sequentially,force,debug -n 'hpcc-run' -- "$@"`
  326. fi
  327. if [[ $? != 0 ]] ; then echo "Failure to parse commandline." >&2 ; end 1 ; fi
  328. eval set -- "$TEMP"
  329. while true ; do
  330. case "$1" in
  331. -c|--comp) comp=$2
  332. if [[ -z ${args} ]]; then
  333. args="-c $2"
  334. else
  335. args="${args} -c $2"
  336. fi
  337. shift 2 ;;
  338. -a|--action) action=$2
  339. shift 2 ;;
  340. -n|--concurrent)
  341. if [[ -n "$2" ]] && [[ $2 =~ ^[1-9][0-9]*$ ]] && [[ $concurrent -ne 1 ]]; then
  342. concurrent=$2
  343. fi
  344. shift 2 ;;
  345. -S|--sequentially)
  346. concurrent=1
  347. RUN_CLUSTER_DISPLAY_OUTPUT=TRUE
  348. shift ;;
  349. -s|--save) save=1
  350. shift ;;
  351. -f|--force) if [[ -z ${args} ]]; then
  352. args="-f"
  353. else
  354. args="${args} -f"
  355. fi
  356. shift ;;
  357. -d|--debug) if [[ -z ${args} ]]; then
  358. args="-d"
  359. else
  360. args="${args} -d"
  361. fi
  362. shift ;;
  363. -h|--help) print_usage
  364. shift ;;
  365. --) shift ; break ;;
  366. *) print_usage ;;
  367. esac
  368. done
  369. case "$action" in
  370. hpcc-init) ;;
  371. dafilesrv) ;;
  372. *) if [[ -z $action ]]; then
  373. if use_systemd; then
  374. action="hpccsystems-platform.target"
  375. else #sysv init
  376. action="hpcc-init"
  377. fi
  378. else
  379. print_usage
  380. fi
  381. ;;
  382. esac
  383. for arg; do
  384. arg=$arg;
  385. case "$arg" in
  386. start)
  387. doStart
  388. ;;
  389. stop)
  390. doStop
  391. ;;
  392. stopall)
  393. doStopall
  394. ;;
  395. restart)
  396. doStop
  397. doStart
  398. ;;
  399. status)
  400. doStatus
  401. ;;
  402. setup)
  403. doSetup
  404. ;;
  405. *) print_usage;;
  406. esac
  407. done
  408. end 0