Browse Source

HPCC-15059 Added check to return value of ssh call for throwing exception

Signed-off-by: Michael Gardner <michael.gardner@lexisnexis.com>
Michael Gardner 9 years ago
parent
commit
2830475ac1
2 changed files with 14 additions and 7 deletions
  1. 3 7
      common/remote/rmtssh.cpp
  2. 11 0
      initfiles/bin/init_thor

+ 3 - 7
common/remote/rmtssh.cpp

@@ -520,7 +520,7 @@ public:
                 StringBuffer res(replytext.item(n));
                 while (res.length()&&(res.charAt(res.length()-1)<=' '))
                     res.setLength(res.length()-1);
-                if (res.length()==0)
+                if (res.length()==0 && !reply.item(n))
                     PROGLOG("%d: %s(%d): [OK]",n+1,slaves.item(n),reply.item(n));
                 else if (strchr(res.str(),'\n')==NULL) {
                     PROGLOG("%d: %s(%d): %s",n+1,slaves.item(n),reply.item(n),res.str());
@@ -545,12 +545,8 @@ public:
             while (res.length()&&(res.charAt(res.length()-1)<=' '))
                 res.setLength(res.length()-1);
             PROGLOG("%s result(%d):\n%s",useplink?"plink":"ssh",reply.item(0),res.str());
-            if (res.length()) {
-                int code = reply.item(0);
-                if (code == 0)
-                    code = -1;
-                throw MakeStringExceptionDirect(code, res.str());
-            }
+            if (reply.item(0))
+                throw MakeStringExceptionDirect(reply.item(0), res.str());
         }
     }
     void exec(

+ 11 - 0
initfiles/bin/init_thor

@@ -77,6 +77,17 @@ kill_slaves()
         if [[ -r $instancedir/uslaves ]]; then
             clusternodes=$(cat $instancedir/uslaves 2> /dev/null | wc -l)
             $deploydir/frunssh $instancedir/slaves "/bin/sh -c '$deploydir/init_thorslave stop localhost $slavespernode $THORSLAVEPORT $slaveportinc $THORMASTER $THORMASTERPORT $LOG_DIR $instancedir $deploydir $THORNAME $PATH_PRE $logredirect'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -n:$clusternodes 2>&1
+            FRUNSSH_RC=$?
+            if [[ ${FRUNSSH_RC} -gt 0 ]]; then
+                log "Error ${FRUNSSH_RC} in frunssh"
+                log "Please check $(dirname ${LOG_DIR})/frunssh for more details"
+                # clean up any slaves it was able to reach
+                log "Stopping ${component}"
+                kill_process ${PID_NAME} thormaster_${component} 30
+                unlock /var/lock/HPCCSystems/$component/${component}.lock
+                rm -f $INIT_PID_NAME $instancedir/slaves > /dev/null 2>&1
+                exit 255
+            fi
         fi
     fi