backup.sh.in 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/bin/bash
  2. ################################################################################
  3. # HPCC SYSTEMS software Copyright (C) 2017 HPCC Systems®.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. ################################################################################
  17. # Expects
  18. # 1: target cluster
  19. # 2: logfile
  20. source @INSTALL_DIR@/etc/init.d/hpcc_common
  21. set_environmentvars
  22. source @ADMIN_PATH@/hpcc_setenv ""
  23. component=$(basename ${PWD})
  24. if [[ $# -ne 2 ]]; then
  25. echo "Incorrect number of arguments to backup.sh ($@) [$$]" >> @LOG_PATH@/${component}/backup.sh.err
  26. exit 1
  27. fi
  28. target="$1"
  29. logfile="$2"
  30. exec 2>@LOG_PATH@/${component}/${target}.debug
  31. set -x
  32. trap "pkill -P $$;log \"${header} Terminated\";exit 0" SIGINT SIGTERM
  33. which_pidof
  34. header="${target} $$ --"
  35. #safe as background job in subshell
  36. if [[ ! -e "@RUNTIME_PATH@/${component}/backupnode.conf" ]]; then
  37. log "${header} Cannot find backupnode.conf"
  38. exit 1
  39. fi
  40. cfg.parser @RUNTIME_PATH@/${component}/backupnode.conf
  41. cfg.section.${target}
  42. # determine if this is a valid target for backupnode
  43. nodegroup=${thorprimary}
  44. if [[ -z "$thorprimary" ]]; then
  45. nodegroup=${thorname}
  46. fi
  47. log "${header} Backup will occur every ${interval} hours"
  48. while true; do
  49. # background for non blocking wait
  50. sleep ${interval}h &
  51. wait
  52. log "${header} Starting backup of cluster"
  53. backupnode_data=$(@EXEC_PATH@/updtdalienv @CONFIG_DIR@/@ENV_XML_FILE@ -d data backupnode ${target})
  54. if [[ -z "${backupnode_data}" ]]; then
  55. log "${header} Cannot determine backupnode directory"
  56. break
  57. fi
  58. # Fetch thorgroup slave machines from dali
  59. @EXEC_PATH@/daliadmin server=$daliserver dfsgroup ${target} @RUNTIME_PATH@/${component}/${target}.slaves
  60. if [[ $? != 0 ]]; then
  61. log "${header} Failed to lookup dfsgroup"
  62. break
  63. fi
  64. backupnode_data=${backupnode_data}/${target}/last_backup
  65. backupnode_remotedata=//${thormaster}$backupnode_data
  66. mkdir -p ${backupnode_data}
  67. rm -f ${backupnode_data}/*.ERR
  68. rm -f ${backupnode_data}/*.DAT
  69. log "${header} Using backupnode directory $backupnode_data"
  70. log "${header} Reading slaves file @RUNTIME_PATH@/${component}/${target}.slaves"
  71. log "${header} Scanning files from dali ..."
  72. @EXEC_PATH@/backupnode -O ${daliserver} ${nodegroup} ${backupnode_data} >> ${logfile} 2>&1
  73. if [[ $? -ne 0 ]]; then
  74. log "${header} Backupnode failed"
  75. break
  76. fi
  77. # maximum number of threads frunssh will be permitted to use (capped by # slaves)
  78. MAXTHREADS=1000
  79. fout=$(frunssh @RUNTIME_PATH@/${component}/${target}.slaves "killall backupnode 1>/dev/null 2>&1" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -n:$MAXTHREADS -b 2>&1)
  80. [[ ! -z "${fout}" ]] && log "${header} ${fout}"
  81. log "${header} frunssh @RUNTIME_PATH@/${component}/${target}.slaves \"/bin/sh -c 'mkdir -p @LOG_PATH@/${component}; mkdir -p @RUNTIME_PATH@; @EXEC_PATH@/backupnode -T -X $backupnode_remotedata %n %c %a %x $2 > @LOG_PATH@/${component}/${target}_backup_node_%n_${logpthtail}.log 2>&1'\" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -n:$MAXTHREADS -b >> $logfile 2>&1"
  82. fout=$(frunssh @RUNTIME_PATH@/${component}/${target}.slaves "/bin/sh -c 'mkdir -p @LOG_PATH@/${component}; mkdir -p @RUNTIME_PATH@; @EXEC_PATH@/backupnode -T -X $backupnode_remotedata %n %c %a %x $2 > @LOG_PATH@/${component}/${target}_backup_node_%n_${logpthtail}.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -n:$MAXTHREADS -b 2>&1)
  83. [[ ! -z "${fout}" ]] && log "${header} ${fout}"
  84. log "${header} Waiting for backup to complete"
  85. log "${header} nohup backupnode -W @RUNTIME_PATH@/${component}/${target}.slaves $backupnode_data >> $logfile 2>&1 &"
  86. nohup backupnode -W @RUNTIME_PATH@/${component}/${target}.slaves $backupnode_data >> $logfile 2>&1
  87. pid=`${PIDOF} backupnode`
  88. log "${header} Backupnode process $pid still continuing"
  89. done