Sfoglia il codice sorgente

Merge pull request #9207 from Michael-Gardner/HPCC-16398

HPCC-16398 Force kill after sigterm attempt (exception for dali)

Reviewed-By: Mark Kelly <mark.kelly@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 8 anni fa
parent
commit
cea5b23c2a
3 ha cambiato i file con 30 aggiunte e 13 eliminazioni
  1. 1 1
      initfiles/bin/init_dali.in
  2. 7 11
      initfiles/bin/init_thor.in
  3. 22 1
      initfiles/sbin/hpcc_setenv.in

+ 1 - 1
initfiles/bin/init_dali.in

@@ -33,7 +33,7 @@ killed()
     log "calling dalistop ."
     dalistop .
     log "Attempting to kill $component"
-    kill_process ${PID_NAME} daserver 3 ${SENTINEL}
+    dali_kill_process ${PID_NAME} daserver 3 ${SENTINEL}
     if [[ $? -eq 1 ]]; then
         log "could not kill $component"
     else

+ 7 - 11
initfiles/bin/init_thor.in

@@ -81,7 +81,7 @@ kill_slaves()
                 log "Please check ${LOG_DIR}/frunssh for more details"
                 # clean up any slaves it was able to reach
                 log "Stopping ${component}"
-                kill_process ${PID_NAME} thormaster_${component} 30
+                kill_process ${PID_NAME} thormaster_${component} 25
                 unlock ${LOCK_DIR}/$component/${component}.lock
                 rm -f $INIT_PID_NAME $instancedir/slaves > /dev/null 2>&1
                 exit 255
@@ -99,16 +99,12 @@ kill_slaves()
 killed()
 {
     log "Stopping ${component}"
-    kill_process ${PID_NAME} thormaster_${component} 30
-    if [[ $? -eq 1 ]]; then
-        log "could not kill $component"
-    else
-        log "$component Stopped"
-        unlock ${LOCK_DIR}/$component/${component}.lock
-        kill_slaves
-        log "removing init.pid file and slaves file"
-        rm -f $INIT_PID_NAME $instancedir/slaves > /dev/null 2>&1
-    fi
+    kill_process ${PID_NAME} thormaster_${component} 25
+    log "$component Stopped"
+    unlock ${LOCK_DIR}/$component/${component}.lock
+    kill_slaves
+    log "removing init.pid file and slaves file"
+    rm -f $INIT_PID_NAME $instancedir/slaves > /dev/null 2>&1
     exit 255
 }
 

+ 22 - 1
initfiles/sbin/hpcc_setenv.in

@@ -31,7 +31,21 @@ function kill_process () {
       [[ -e $SENTINEL ]] && rm -f $SENTINEL
     fi
     if [[ -e $PID ]]; then
-        pidwait_fn $PID $PROCESS $TIMEOUT
+        pidwait_fn $PID $PROCESS $TIMEOUT 1
+        local RC_PIDWAIT=$?
+        return $RC_PIDWAIT
+    fi
+}
+function dali_kill_process () {
+    local PID=$1
+    local PROCESS=$2
+    local TIMEOUT=$3
+    if [[ $# -eq 4 ]]; then
+      SENTINEL=$4
+      [[ -e $SENTINEL ]] && rm -f $SENTINEL
+    fi
+    if [[ -e $PID ]]; then
+        pidwait_fn $PID $PROCESS $TIMEOUT 0
         local RC_PIDWAIT=$?
         return $RC_PIDWAIT
     fi
@@ -42,6 +56,7 @@ function pidwait_fn () {
     local WATCH_PID=$( cat $PID )
     local PROCESS=$2
     local TIMEOUT=$(($3*1000))
+    local FORCE=$4
 
     if ps -p $WATCH_PID -o command= | grep -v "${PROCESS}" &>/dev/null ; then
       return 0
@@ -55,6 +70,12 @@ function pidwait_fn () {
 
     if [[ $TIMEOUT -le 0 ]] && ps -p $WATCH_PID -o command= | grep "${PROCESS}" &>/dev/null ; then
       log "Failed to kill ${PROCESS} with SIGTERM"
+      if [[ $FORCE -eq 1 ]]; then
+        kill -SIGKILL $WATCH_PID > /dev/null 2>&1
+        log "killing ${PROCESS} (pid: $WATCH_PID) with SIGKILL"
+        rm -f $PID > /dev/null 2>&1
+        return 0
+      fi
       return 1
     fi