Bläddra i källkod

HPCC-8248 - Allow frunssh to use more threads by default

This in particular effected backupnode, where the child processes
can take a long time. With the default frunssh limit of 10, it
meant that it would only start 10 (out of e.g. 400) slave
backupnode processes at a time, extending the overall time
significantly.

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith 12 år sedan
förälder
incheckning
ae17cadfe2

+ 6 - 3
initfiles/componentfiles/thor/start_backupnode.in

@@ -109,9 +109,12 @@ if [ $? -ne 0 ]; then
   exit 1
 fi
 
-frunssh $INSTANCE_DIR/slaves "killall backupnode" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
-echo frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA %n %c %a %x $2 > $LOGPATH/${LOGDATE}_node%n.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
-frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA %n %c %a %x $2 > $LOGPATH/${LOGDATE}_node%n.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
+# maximum number of threads frunssh will be permitted to use (capped by # slaves)
+MAXTHREADS=1000
+
+frunssh $INSTANCE_DIR/slaves "killall backupnode" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -n:$MAXTHREADS -b >> $LOGFILE 2>&1
+echo frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA %n %c %a %x $2 > $LOGPATH/${LOGDATE}_node%n.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -n:$MAXTHREADS -b >> $LOGFILE 2>&1
+frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA %n %c %a %x $2 > $LOGPATH/${LOGDATE}_node%n.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -n:$MAXTHREADS -b >> $LOGFILE 2>&1
 
 echo ------------------------------
 sleep 5

+ 5 - 0
services/runagent/frunssh.cpp

@@ -49,6 +49,11 @@ int main( int argc, char *argv[] )
 
 
     InitModuleObjects();
+
+#ifndef __64BIT__
+    Thread::setDefaultStackSize(0x10000);   // NB under windows requires linker setting (/stack:)
+#endif
+
     try  {
         StringBuffer logname;
         splitFilename(argv[0], NULL, NULL, &logname, NULL);

+ 2 - 2
tools/backupnode/backupnode.cpp

@@ -34,7 +34,7 @@
 
 #define USE_JLOG
 
-extern bool outputPartsFiles(const char *daliserver,const char *cluster,const char *outdir,StringBuffer &errstr);
+extern bool outputPartsFiles(const char *daliserver,const char *cluster,const char *outdir,StringBuffer &errstr,bool verbose);
 extern void applyPartsFile(IFileIO *in,void (* applyfn)(const char *,const char *));
 
 
@@ -706,7 +706,7 @@ int main(int argc, const char *argv[])
                 if (!silent)
                     println("Creating part lists, please wait...");
                 StringBuffer errstr;
-                if (!outputPartsFiles(args.item(0),args.item(1),args.item(2),errstr))
+                if (!outputPartsFiles(args.item(0),args.item(1),args.item(2),errstr,verbose))
                     throw MakeStringException(-1, "%s", errstr.str());
             }
         }

+ 2 - 1
tools/backupnode/backupnode2.cpp

@@ -361,7 +361,7 @@ public:
 
 };
 
-bool outputPartsFiles(const char *daliserver,const char *cluster,const char *outdir, StringBuffer &errstr)
+bool outputPartsFiles(const char *daliserver,const char *cluster,const char *outdir, StringBuffer &errstr, bool verbose)
 {
     errstr.clear();
     bool dalistarted;
@@ -372,6 +372,7 @@ bool outputPartsFiles(const char *daliserver,const char *cluster,const char *out
             initClientProcess(serverGroup, DCR_BackupGen, 0, NULL, NULL, 1000*60*5);
             dalistarted = true;
             CFileListWriter writer;
+            writer.verbose = verbose;
             Owned<IGroup> group = queryNamedGroupStore().lookup(cluster);
             if (group) {
                 IArrayOf<IFileIOStream> outStreams;