瀏覽代碼

Merge pull request #10977 from mckellyln/parallel_init

HPCC-19329 Allow parallel init for faster start/stop

Reviewed-By: Jake Smith <jake.smith@lexisnexis.com>
Reviewed-By: Michael Gardner <michael.gardner@lexisnexis.com>
Reviewed-By: Gavin Halliday <gavin.halliday@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 7 年之前
父節點
當前提交
dad5df957a

+ 33 - 11
dali/dalistop/dalistop.cpp

@@ -26,13 +26,17 @@
 int main(int argc, char* argv[])
 {
     InitModuleObjects();
-    try {
-        if (argc<2) {
+    int exitCode = 1;
+    try
+    {
+        if (argc<2)
+        {
             printf("usage: dalistop <server_ip:port> [/nowait]\n");
             printf("eg:  dalistop .                          -- stop dali server running locally\n");
             printf("     dalistop eq0001016                  -- stop dali server running remotely\n");
         }
-        else {
+        else
+        {
             SocketEndpoint ep;
             ep.set(argv[1],DALI_SERVER_PORT);
             bool nowait = false;
@@ -45,26 +49,44 @@ int main(int argc, char* argv[])
             CMessageBuffer mb;
             int fn=-1;
             mb.append(fn);
-            if (comm->verifyConnection(0,2000)) {
+            if (comm->verifyConnection(0,2000))
+            {
                 comm->send(mb,0,MPTAG_DALI_COVEN_REQUEST,MP_ASYNC_SEND);
                 if (nowait)
+                {
                     Sleep(1000);
+                    exitCode = 0;
+                }
                 else
-                    while (comm->verifyConnection(0,1000)) {
-                        PROGLOG("Waiting for Dali Server to stop....");
-                        Sleep(5000);
+                {
+                    // verifyConnection() has a min conn timeout of 10s
+                    // use recv() instead to check for socket closed ...
+                    try
+                    {
+                        while (!comm->recv(mb,0,MPTAG_DALI_COVEN_REQUEST,nullptr,5000))
+                        {
+                            printf("Waiting for Dali Server to stop....\n");
+                        }
+                        exitCode = 0;
                     }
+                    catch (IMP_Exception *e)
+                    {
+                        if (e->errorCode() == MPERR_link_closed)
+                            exitCode = 0;
+                        e->Release();
+                    }
+                }
             }
             else
-                PROGLOG("Dali not responding");
+                fprintf(stderr, "Dali not responding\n");
             stopMPServer();
         }
     }
-    catch (IException *e) {
+    catch (IException *e)
+    {
         pexception("Exception",e);
         stopMPServer();
     }
     releaseAtoms();
-    return 0;
+    return exitCode;
 }
-

+ 2 - 0
initfiles/bash/etc/init.d/dafilesrv.in

@@ -57,6 +57,8 @@ function print_usage {
     exit 0
 }
 
+cfggenpre=()
+
 source  ${INSTALL_DIR}/etc/init.d/lock.sh
 source  ${INSTALL_DIR}/etc/init.d/pid.sh
 source  ${INSTALL_DIR}/etc/init.d/hpcc_common

+ 18 - 8
initfiles/bash/etc/init.d/hpcc-init.in

@@ -89,6 +89,7 @@ function print_types {
 }
 
 
+cfggenpre=()
 
 source  ${INSTALL_DIR}/etc/init.d/lock.sh
 source  ${INSTALL_DIR}/etc/init.d/pid.sh
@@ -277,15 +278,24 @@ if [ -z $arg ] || [ $# -ne 1 ]; then
     print_usage
 fi
 
-log "Debug log written to $LOG_DIR/hpcc-init.debug"
-[ -e $LOG_DIR/hpcc-init.debug ] && rm -rf ${LOG_DIR}/hpcc-init.debug
-touch ${LOG_DIR}/hpcc-init.debug
-chown ${user}:${group} ${LOG_DIR}/hpcc-init.debug
-PS4='+\011$(date "+%T.%N")\011'
-exec 3>&2 2>$LOG_DIR/hpcc-init.debug
-set -x
+thisos=$(uname -s)
+if [[ "$thisos" = "Linux" && -f "${envfile}" && -n "${component}" && \
+    "${DEBUG}" = "NO_DEBUG" && -z "$HPCC_NO_FLOCK" ]] ; then
+    cfggenpre=(flock ${envfile})
+fi
+
+# verbose output of all shell cmds to .debug file only if -d specified on cmdline
+if [[ "${DEBUG}" != "NO_DEBUG" ]] ; then
+    log "Debug log written to $LOG_DIR/hpcc-init.debug"
+    [ -e $LOG_DIR/hpcc-init.debug ] && rm -rf ${LOG_DIR}/hpcc-init.debug
+    touch ${LOG_DIR}/hpcc-init.debug
+    chown ${user}:${group} ${LOG_DIR}/hpcc-init.debug
+    PS4='+\011$(date "+%T.%N")\011'
+    exec 3>&2 2>$LOG_DIR/hpcc-init.debug
+    set -x
+fi
 
-if [ -z ${component} ]; then
+if [ -z "${component}" ]; then
     for (( i=0; i<=${compListLen}; i++ ));do
         component="$component ${compList[$i]}"
     done

+ 16 - 13
initfiles/bash/etc/init.d/hpcc_common.in

@@ -274,7 +274,9 @@ set_componentvars() {
 }
 
 validate_configuration() {
-    if ! validation_error=$(${configgen_path}/configgen -env ${envfile} -validateonly 2>&1); then
+    validation_error=$(${cfggenpre[@]} ${configgen_path}/configgen -env ${envfile} -validateonly 2>&1)
+    rc=$?
+    if [[ $rc -ne 0 ]]; then
         log  "validate_configuration(): validation failure ${envfile}"
         log  "${validation_error}"
         echo -e "\033[31merror\033[0m: configgen xml validation failure"
@@ -284,7 +286,7 @@ validate_configuration() {
 
 get_commondirs() {
     componentFile="${path}/componentfiles/configxml"
-    DIRS=$(${configgen_path}/configgen -env ${envfile} -id ${componentFile} -listcommondirs)
+    DIRS=$(${cfggenpre[@]} ${configgen_path}/configgen -env ${envfile} -id ${componentFile} -listcommondirs)
     rc=$?
     if [[ $rc -ne 0 ]]; then
         log  "get_commondirs(): failure in configgen call"
@@ -300,7 +302,7 @@ configGenCmd() {
     # Creating logfiles for component
     logDir=$log/${compName}
 
-    configcmd="${configgen_path}/configgen -env ${envfile} -od ${runtime} -id ${componentFile} -c ${compName}"
+    configcmd="${cfggenpre[@]} ${configgen_path}/configgen -env ${envfile} -od ${runtime} -id ${componentFile} -c ${compName}"
     log "$configcmd"
     if [ "$(whoami)" != "${user}" ]; then
         su ${user} -c "$configcmd" 2>/dev/null
@@ -571,15 +573,16 @@ startCmd() {
         if [[ ${RCSTART} -eq 0 ]]; then
             log_success_msg
             return 0;
-        fi
-        checkPidExist $PIDPATH
-        local initRunning=$__pidExists
-        if [[ $initRunning -eq 0 ]]; then
-            log "${compName} failed to start cleanly"
-            log "Refer to the log file for the binary ${compName} for more information"
-            log_failure_msg
-            cleanupRuntimeEnvironment
-            return 1;
+        elif [[ ${RCSTART} -ne 4 ]]; then
+            checkPidExist $PIDPATH
+            local initRunning=$__pidExists
+            if [[ $initRunning -eq 0 ]]; then
+                log "${compName} failed to start cleanly"
+                log "Refer to the log file for the binary ${compName} for more information"
+                log_failure_msg
+                cleanupRuntimeEnvironment
+                return 1;
+            fi
         fi
         sleep 1
     done
@@ -739,7 +742,7 @@ setup_component() {
 create_dropzone() {
     OIFS=${IFS}
     unset IFS
-    dropzones=$(${configgen_path}/configgen -env ${envfile} -listdirs)
+    dropzones=$(${cfggenpre[@]} ${configgen_path}/configgen -env ${envfile} -listdirs)
     rc=$?
     if [[ $rc -ne 0 ]]; then
         log  "create_dropzone(): failure in configgen call"

+ 3 - 2
initfiles/bash/etc/init.d/init-functions

@@ -201,7 +201,7 @@ log_timeout_msg () {
 # general logging message for init scripts
 # expects $logfile to exist within the context of where it's called
 log() {
-  if [[ -z ${logfile+x} ]]; then
+  if [[ -z "${logfile}" ]]; then
     # logfile isn't set within the context of this function call
     return 1
   fi
@@ -209,7 +209,8 @@ log() {
   local msg=$@
   local header=$( date +%Y_%m_%d_%H_%M_%S )
   local header="${header}: "
-  (printf "%s%s\n" "$header" "$msg" >> $logfile) 2> /dev/null
+  local thispid=$$
+  (printf "%d %s%s\n" "$thispid" "$header" "$msg" >> $logfile) 2> /dev/null
   if [[ $? -ne 0 ]]; then
     echo "unable to write to ${logfile}" 1>&2
     return 1