Bläddra i källkod

Merge pull request #7423 from Michael-Gardner/HPCC-13706

HPCC-13706 Modified unlock to be done within component init scripts.

Reviewed-By: Xiaoming Wang <xiaoming.wang@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 10 år sedan
förälder
incheckning
39aae82fe4

+ 76 - 64
initfiles/bash/etc/init.d/hpcc_common.in

@@ -390,18 +390,31 @@ startCmd() {
     logDir=$log/${compName}
 
     if [ ${noStatusCheck} -ne 1 ]; then
-      check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} 1
-      RCSTART=$?
-      if [ ${RCSTART} -gt 1 ];then
-        # take care of failure message in check_status function
-        cleanupRuntimeEnvironment
-      fi
-      if [ ${RCSTART} -eq 0 ]; then
-        #Since component is already started but current script is failed till returning 0
-        log "$compName ---> already started"
-        log_success_msg "Already Started"
-        return ${RCSTART}
-      fi
+        check_status ${PIDPATH} ${LOCKPATH} ${COMPPIDPATH} 1
+        RCSTART=$?
+        if [ ${RCSTART} -gt 1 ];then
+            # take care of failure message in check_status function
+            checkPidExist $PIDPATH
+            local initRunning=$__pidExists
+            checkPidExist $COMPPIDPATH
+            local compRunning=$__pidExists
+            if [[ $compRunning -eq 1 || $initRunning -eq 1 ]]; then
+                log "Orphaned Process"
+                cleanup_component
+                if [[ $? -eq 1 ]]; then
+                    log_failure_msg
+                    return 1
+                else
+                     cleanupRuntimeEnvironment
+                fi
+            fi
+        fi
+        if [ ${RCSTART} -eq 0 ]; then
+            #Since component is already started but current script is failed till returning 0
+            log "$compName ---> already started"
+            log_success_msg "Already Started"
+            return ${RCSTART}
+        fi
     fi
 
     limits=(
@@ -517,8 +530,6 @@ stop_component() {
 
     eval $stopcmd
 
-    unlock ${LOCKPATH}
-
     RESULT=0
     local waittime=30
     [[ $compType = "dali" ]] && waittime=720
@@ -910,57 +921,58 @@ cluster_tools_init() {
 ##
 cleanup_component() {
 
-  # used to get variables for frunssh
-  # Necessary for when we source in setvars, since we aren't using the start-stop-demon the $HOME will
-  # be set to /root or some other location, and not the appropriate directory, causing problems
-  # with $SSHidentityfile
-  set_environmentvars
-  HOME=${home}/${user}
-  instancedir=${runtime}/${compName}
-  if [ -e $instancedir/setvars ]; then
-    source $instancedir/setvars
-  fi
-
-  # grab the PID of our component, and in the case it doesn't exist, the pid of the init file (in case
-  # it somehow is still alive)
-  local cpidpath=${COMPPIDPATH}
-  if [ "${compType}" = "thor" ] && [ ! -f "${cpidpath}" ]; then
-    # if run_thor is sent a SIGKILL, the normal COMPPIDPATH file will no longer exist, we catch this and
-    # fall back to the {compName}_master.pid file that will still be available.
-    cpidpath=$( echo $cpidpath | sed 's/\.pid/_master.pid/' )
-  fi
-  getPid ${cpidpath}
-  local mpid=$__pidValue
-  if [ $mpid -eq 0 ]; then
-    getPid ${PIDPATH}
-    mpid=$__pidValue
-  fi
-
-  # start with SIGTERM and then follow up with SIGKILL if unsuccessful
-  if [ $mpid -ne 0 ] ; then
-    # use the mpid we collected to grab the group pid of the process, to kill off all siblings at once
-    local pgid=$( ps -p $mpid -o pid,pgid | grep $mpid | awk '{ print $2 }' )
-
-    kill -15 -$pgid
-    sleep 1
-    local WAITTIME=5
-    local RUNNING=1
-    while [[ ${WAITTIME} -gt 0 ]]; do
-      WAITTIME=`expr ${WAITTIME} - 1`
-      kill -0 -$pgid &> /dev/null
-      if [ $? -ne 0 ];then
-        log "${compName} orphans cleaned up"
-        RUNNING=0
-        break;
-      else
-        log "Waiting for ${compName} orphans to cleanup gracefully"
+    # used to get variables for frunssh
+    # Necessary for when we source in setvars, since we aren't using the start-stop-demon the $HOME will
+    # be set to /root or some other location, and not the appropriate directory, causing problems
+    # with $SSHidentityfile
+    set_environmentvars
+    HOME=${home}/${user}
+    instancedir=${runtime}/${compName}
+    if [ -e $instancedir/setvars ]; then
+        source $instancedir/setvars
+    fi
+
+    # grab the PID of our component, and in the case it doesn't exist, the pid of the init file (in case
+    # it somehow is still alive)
+    local cpidpath=${COMPPIDPATH}
+    if [ "${compType}" = "thor" ] && [ ! -f "${cpidpath}" ]; then
+        # if run_thor is sent a SIGKILL, the normal COMPPIDPATH file will no longer exist, we catch this and
+        # fall back to the {compName}_master.pid file that will still be available.
+        cpidpath=$( echo $cpidpath | sed 's/\.pid/_master.pid/' )
+    fi
+    getPid ${cpidpath}
+    local mpid=$__pidValue
+    if [ $mpid -eq 0 ]; then
+        getPid ${PIDPATH}
+        mpid=$__pidValue
+    fi
+
+    # start with SIGTERM and then follow up with SIGKILL if unsuccessful
+    if [ $mpid -ne 0 ] ; then
+        # use the mpid we collected to grab the group pid of the process, to kill off all siblings at once
+        local pgid=$( ps -p $mpid -o pid,pgid | grep $mpid | awk '{ print $2 }' )
+
+        kill -15 -$pgid
         sleep 1
-      fi
-    done
+        local WAITTIME=5
+        local RUNNING=1
+        while [[ ${WAITTIME} -gt 0 ]]; do
+            WAITTIME=`expr ${WAITTIME} - 1`
+            kill -0 -$pgid &> /dev/null
+            if [ $? -ne 0 ];then
+                log "${compName} orphans cleaned up"
+                RUNNING=0
+                break;
+            else
+                log "Waiting for ${compName} orphans to cleanup gracefully"
+                sleep 1
+            fi
+        done
 
-    if [[ ${RUNNING} -eq 1 ]]; then
-      log "sending SIGKILL to ${compName} orphans"
-      kill -9 -$pgid
+        if [[ ${RUNNING} -eq 1 ]]; then
+            log "sending SIGKILL to ${compName} orphans"
+            kill -9 -$pgid
+        fi
     fi
-  fi
+    return $RUNNING
 }

+ 5 - 2
initfiles/bin/init_configesp

@@ -22,6 +22,8 @@ PID_NAME="$PID/$(basename $PWD).pid"
 source ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="configesp.sentinel"
@@ -33,9 +35,10 @@ killed()
     log "attempting to kill $component"
     kill_process ${PID_NAME} configesp 3 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
-      log "could not kill $component"
+        log "could not kill $component"
     else
-      log "Stopped $component"
+        log "Stopped $component"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
     fi
     exit 255
 }

+ 5 - 2
initfiles/bin/init_dafilesrv.in

@@ -33,6 +33,8 @@ source ${INSTALL_DIR}/etc/init.d/hpcc_common
 export handlelimit=32768
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="dafilesrv.sentinel"
@@ -44,9 +46,10 @@ killed()
     log "Attempting to kill $component"
     kill_process ${PID_NAME} dafilesrv 3 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
-      log "could not kill $component"
+        log "could not kill $component"
     else
-      log "$component Stopped"
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
     fi
     exit 255
 }

+ 5 - 2
initfiles/bin/init_dali

@@ -21,6 +21,8 @@ PID_NAME="$PID/$(basename $PWD).pid"
 source ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="daserver.sentinel"
@@ -34,9 +36,10 @@ killed()
     log "Attempting to kill $component"
     kill_process ${PID_NAME} daserver 3 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
-      log "could not kill $component"
+        log "could not kill $component"
     else
-      log "$component Stopped"
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
     fi
     exit 255
 }

+ 5 - 2
initfiles/bin/init_dfuserver

@@ -21,6 +21,8 @@ PID_NAME="$PID/$(basename $PWD).pid"
 source ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="dfuserver.sentinel"
@@ -32,9 +34,10 @@ killed()
     log "Attempting to kill $component"
     kill_process ${PID_NAME} dfuserver 15 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
-      log "could not kill $component"
+        log "could not kill $component"
     else
-      log "$component Stopped"
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
     fi
     exit 255
 }

+ 5 - 2
initfiles/bin/init_eclagent.in

@@ -23,6 +23,8 @@ PID_NAME="$PID/$(basename $PWD).pid"
 source ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="agentexec.sentinel"
@@ -37,9 +39,10 @@ killed ()
     log "Attempting to kill $component"
     kill_process ${PID_NAME} agentexec 3 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
-      log "could not kill $component"
+        log "could not kill $component"
     else
-      log "$component Stopped"
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
     fi
     exit 255
 }

+ 8 - 0
initfiles/bin/init_eclccserver

@@ -21,6 +21,8 @@ PID_NAME="$PID/$(basename $PWD).pid"
 source ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="eclccserver.sentinel"
@@ -30,6 +32,12 @@ rm -f ${SENTINEL}
 killed()
 {
     kill_process ${PID_NAME} eclccserver 3 ${SENTINEL}
+    if [[ $? -eq 1 ]]; then
+        log "could not kill $component"
+    else
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
+    fi
     exit 255
 }
 

+ 5 - 2
initfiles/bin/init_eclscheduler

@@ -21,6 +21,8 @@ PID_NAME="$PID/$(basename $PWD).pid"
 source ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="eclscheduler.sentinel"
@@ -32,9 +34,10 @@ killed()
     log "Attempting to kill $component"
     kill_process ${PID_NAME} eclscheduler 3 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
-      log "could not kill $component"
+        log "could not kill $component"
     else
-      log "$component Stopped"
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
     fi
     exit 255
 }

+ 8 - 0
initfiles/bin/init_esp

@@ -21,6 +21,8 @@ PID_NAME="$PID/$(basename $PWD).pid"
 source ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="esp.sentinel"
@@ -32,6 +34,12 @@ SNMPID=$$
 killed()
 {
     kill_process ${PID_NAME} esp 15 ${SENTINEL}
+    if [[ $? -eq 1 ]]; then
+        log "could not kill $component"
+    else
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
+    fi
     exit 255
 }
 

+ 5 - 2
initfiles/bin/init_roxie

@@ -21,6 +21,8 @@ PID_NAME="$PID/$(basename $PWD).pid"
 source ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="roxie.sentinel"
@@ -59,9 +61,10 @@ killed()
     log "Attempting to kill $component"
     kill_process ${PID_NAME} roxie 3 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
-      log "could not kill $component"
+        log "could not kill $component"
     else
-      log "$component Stopped"
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
     fi
     exit 255
 }

+ 5 - 2
initfiles/bin/init_sasha

@@ -23,6 +23,8 @@ INSTALL_DIR="$(dirname ${PATH_PRE})/.."
 source  ${INSTALL_DIR}/etc/init.d/hpcc_common
 
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 export logfile="${LOG_DIR}/${component}/init_${component}_$(date +%Y_%m_%d_%H_%M_%S).log"
 
 export SENTINEL="saserver.sentinel"
@@ -34,9 +36,10 @@ killed()
     log "Attempting to kill $component"
     kill_process ${PID_NAME} saserver 3 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
-      log "could not kill $component"
+        log "could not kill $component"
     else
-      log "$component Stopped"
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
     fi
     exit 255
 }

+ 11 - 3
initfiles/bin/init_thor

@@ -22,6 +22,8 @@ source ${PATH_PRE}
 INSTALL_DIR=$(dirname ${PATH_PRE})/..
 source  ${INSTALL_DIR}/etc/init.d/hpcc_common
 component=$(basename $PWD)
+dir.getByName lock
+lock="$dir_return"
 
 PID_NAME="$PID/${component}.pid"
 
@@ -112,9 +114,15 @@ killed()
 {
     log "Stopping ${component}"
     kill_process ${PID_NAME} thormaster_${component} 30
-    kill_slaves
-    log "removing init.pid file and uslaves.start file"
-    rm -f $INIT_PID_NAME $instancedir/uslaves.start > /dev/null 2>&1
+    if [[ $? -ne 1 ]]; then
+        log "could not kill $component"
+    else
+        log "$component Stopped"
+        unlock /var/lock/HPCCSystems/$component/${component}.lock
+        kill_slaves
+        log "removing init.pid file and uslaves.start file"
+        rm -f $INIT_PID_NAME $instancedir/uslaves.start > /dev/null 2>&1
+    fi
     exit 255
 }