Browse Source

HPCC-21388 Automatically add spark-env.sh and fix MASTER_IP and SPARK_LOCAL_IP

Signed-off-by: Michael Gardner <michael.gardner@lexisnexisrisk.com>

HPCC-21388 Rename generated spark-env.sh to spark-hpcc-env.sh

Signed-off-by: Michael Gardner <michael.gardner@lexisnexisrisk.com>
Michael Gardner 6 years ago
parent
commit
efa1451617

+ 1 - 1
initfiles/bash/etc/init.d/export-path

@@ -15,6 +15,6 @@
 #    limitations under the License.
 ################################################################################
 currentDirectory=`pwd`
-sourceCmd="export \"PATH=${currentDirectory}:${PATH}\""
+sourceCmd="export \"PATH=${currentDirectory}:/usr/sbin:/usr/local/sbin:${PATH}\""
 eval $sourceCmd
 ##echo "path = $PATH"

+ 1 - 1
initfiles/componentfiles/configxml/cgencomplist_linux.xml.in

@@ -52,7 +52,7 @@
     <File name="backupnode_vars.xsl" method="xslt" destName="backupnode.conf"/>
   </Component>
   <Component name="sparkthor" processName="SparkThor" schema="sparkThor.xsd">
-    <File name="sparkThor.xsl" method="xslt" destName="spark-env.sh"/>
+    <File name="sparkThor.xsl" method="xslt" destName="spark-hpcc-env.sh"/>
     <File name="spark-defaults.xsl" method="xslt" destName="spark-defaults.conf"/>
   </Component>
   <Component name="ldapServer" processName="LDAPServerProcess">

+ 6 - 0
plugins/spark/CMakeLists.txt

@@ -142,6 +142,12 @@ if(SPARK)
         COMPONENT runtime
         DESTINATION "externals/spark-hadoop/conf"
         )
+    install(
+        FILES
+            ${CMAKE_CURRENT_SOURCE_DIR}/spark-env.install
+        COMPONENT runtime
+        DESTINATION "etc/init.d/install"
+        )
 
     configure_file("${CMAKE_CURRENT_SOURCE_DIR}/sparkthor.sh.in" "${CMAKE_CURRENT_BINARY_DIR}/sparkthor.sh" @ONLY)
     configure_file("${CMAKE_CURRENT_SOURCE_DIR}/sparkthor-worker.sh.in" "${CMAKE_CURRENT_BINARY_DIR}/sparkthor-worker.sh" @ONLY)

+ 25 - 0
plugins/spark/spark-env.install

@@ -0,0 +1,25 @@
+
+mkdir -p ${CONFIG_DIR}/rpmnew
+mkdir -p ${CONFIG_DIR}/externals/spark-hadoop
+
+printf "Installing %-44s ..." "spark-env.sh"
+
+if [ ! -e ${CONFIG_DIR}/spark-env.sh ]; then
+    # Always install new files without comment
+    cp -f ${INSTALL_DIR}/externals/spark-hadoop/conf/spark-env.sh ${CONFIG_DIR}/externals/spark-hadoop/spark-env.sh
+    cp -f ${INSTALL_DIR}/externals/spark-hadoop/conf/spark-env.sh ${CONFIG_DIR}/rpmnew/spark-env.sh
+    log_success_msg
+elif [ -e ${CONFIG_DIR}/rpmnew/spark-env.sh ] && ! `diff -q ${CONFIG_DIR}/rpmnew/spark-env.sh ${INSTALL_DIR}/externals/spark-hadoop/conf/spark-env.sh >/dev/null` ; then
+    # There are changes in the default config since last installed
+    if ! `diff -q ${CONFIG_DIR}/rpmnew/spark-env.sh ${CONFIG_DIR}/externals/spark-hadoop/spark-env.sh >/dev/null` ; then
+        # User has made their own changes too, so don't overwrite
+        log_failure_msg "Not overwriting modified configuration file spark-env.sh"
+    else
+        # User has NOT made their own changes - ok to update
+        cp -f ${INSTALL_DIR}/externals/spark-hadoop/conf/spark-env.sh ${CONFIG_DIR}/externals/spark-hadoop/spark-env.sh
+        cp -f ${INSTALL_DIR}/externals/spark-hadoop/conf/spark-env.sh ${CONFIG_DIR}/rpmnew/spark-env.sh
+        log_success_msg "Updated configuration file spark-env.sh"
+    fi
+else
+    log_success_msg "No changes to configuration file spark-env.sh"
+fi

+ 13 - 7
plugins/spark/spark-env.sh.in

@@ -67,11 +67,17 @@
 # - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
 # - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS
 
-ALL_IPS=$(@ADMIN_PATH@/configgen -env @CONFIG_DIR@/@ENV_XML_FILE@ -listall2 | awk -F , '{print $3}' | sort | uniq)
-LOCAL_IPS=$(ip addr show | grep inet | grep -v inet6 | awk '{print $2}' | awk -F / '{print $1}' | grep -v 127.0.0.1 | sort)
-export SPARK_LOCAL_IP=($(comm -12 <(printf '%s\n' "${LOCAL_IPS[@]}") <(printf '%s\n' "${ALL_IPS[@]}")))
+if [[ -e "@CONFIG_DIR@/@ENV_CONF_FILE@" ]]; then
+    interface=$(cat @CONFIG_DIR@/@ENV_CONF_FILE@ | awk -F= '/^interface/ {print $2;}')
+    if [[ "${interface}" == "*" ]]; then
+        unset interface
+    fi
+fi
 
-
-SPARK_MASTER_HOST=$(@ADMIN_PATH@/configgen -env @CONFIG_DIR@/@ENV_XML_FILE@ -listall -t esp | awk -F , 'NR==1{print $3}')
-SPARK_WORKER_CORES=1
-SPARK_WORKER_MEMORY=4g
+if [[ -z ${interface:+x} ]]; then 
+    ALL_IPS=$(@ADMIN_PATH@/configgen -env @CONFIG_DIR@/@ENV_XML_FILE@ -listall2 | awk -F , '{print $3}' | sort | uniq)
+    LOCAL_IPS=$(ip addr show | grep inet | grep -v inet6 | awk '{print $2}' | awk -F / '{print $1}' | grep -v 127.0.0.1 | sort)
+    export SPARK_LOCAL_IP=$(comm -12 <(printf '%s\n' "${LOCAL_IPS[@]}") <(printf '%s\n' "${ALL_IPS[@]}"))
+else
+    export SPARK_LOCAL_IP=$(ip address show dev ${interface} | grep inet | grep -v inet6 | awk '{print $2;}' | awk -F / '{print $1;}') 
+fi

+ 4 - 4
plugins/spark/sparkthor-worker.sh.in

@@ -33,20 +33,20 @@ cd @RUNTIME_PATH@/${_component}
 
 MASTER_IP="$(@ADMIN_PATH@/configgen -env @CONFIG_DIR@/@ENV_XML_FILE@ -listall -c ${_component} | awk -F "," '{print $3;}')"
     
-log "rsync -e \"ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no\" --timeout=60 ${MASTER_IP}:@RUNTIME_PATH@/${_component}/spark-env.sh @RUNTIME_PATH@/${_component}/spark-env.sh"
+log "rsync -e \"ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no\" --timeout=60 ${MASTER_IP}:@RUNTIME_PATH@/${_component}/spark-hpcc-env.sh @RUNTIME_PATH@/${_component}/spark-hpcc-env.sh"
 rsync_att=3
 rsync_stat=1
 while [[ $rsync_stat -ne 0 && $rsync_att -gt 0 ]] ; do
-    rsync -e "ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no" --timeout=60 ${MASTER_IP}:@RUNTIME_PATH@/${_component}/spark-env.sh spark-env.sh
+    rsync -e "ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no" --timeout=60 ${MASTER_IP}:@RUNTIME_PATH@/${_component}/spark-hpcc-env.sh spark-hpcc-env.sh
     rsync_stat=$?
     ((rsync_att--))
     log "rsync returns ${rsync_stat}"
 done
-if [ ! -f @RUNTIME_PATH@/${_component}/spark-env.sh ] ; then
+if [ ! -f @RUNTIME_PATH@/${_component}/spark-hpcc-env.sh ] ; then
     log "Error, $slavesfname file missing"
     exit 1
 fi
-source @RUNTIME_PATH@/${_component}/spark-env.sh
+source @RUNTIME_PATH@/${_component}/spark-hpcc-env.sh
 
 MASTER_URL="spark://${MASTER_IP}:${SPARK_MASTER_PORT}"
 

+ 1 - 1
plugins/spark/sparkthor.sh.in

@@ -26,7 +26,7 @@ _component=${2:-mysparkthor}
 export logfile="@LOG_PATH@/${_component}/sparkthor.log"
 source @INSTALL_DIR@/sbin/hpcc_setenv
 source @INSTALL_DIR@/etc/init.d/hpcc_common
-source ./spark-env.sh
+source ./spark-hpcc-env.sh
 
 # update slaves file in case state of environment has been altered since last run
 errorMessage=$( @EXEC_PATH@/daliadmin server=$DALISERVER clusternodes ${NODEGROUP} @RUNTIME_PATH@/${_component}/slaves 2>&1 )