فهرست منبع

Make backupnode not require slaves files on slaves

Backupnode required the slaves file to exist on all
slaves. In the new active group scheme, the slaves
file is dynamic and only exists on the master.
Change backupnode and script to pass relevant info
to child backup processes

+ Fix a redirection problem, which meant output from
the child backupnode processes was being lost

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith 13 سال پیش
والد
کامیت
65f04c7673
3فایلهای تغییر یافته به همراه41 افزوده شده و 70 حذف شده
  1. 3 0
      common/remote/rmtssh.cpp
  2. 5 4
      initfiles/componentfiles/thor/start_backupnode.in
  3. 33 66
      tools/backupnode/backupnode.cpp

+ 3 - 0
common/remote/rmtssh.cpp

@@ -89,6 +89,9 @@ class CFRunSSH: public CInterface, implements IFRunSSH
                 case 'x': // Next Node
                     cmdbuf.append(slaves.item((nodenum+replicationoffset)%slaves.ordinality()));
                     break;
+                case 'c':
+                    cmdbuf.append(slaves.ordinality());
+                    break;
                 case 't': // Tree Node
                     if (treefrom)
                         cmdbuf.append(slaves.item(treefrom-1));

+ 5 - 4
initfiles/componentfiles/thor/start_backupnode.in

@@ -49,7 +49,7 @@ RUN_DIR=`cat ${HPCC_CONFIG} | sed -n "/\[DEFAULT\]/,/\[/p" | grep "^runtime=" |
 INSTANCE_DIR=$RUN_DIR/$1
 
 if [ ! -e $INSTANCE_DIR ] ; then
-  # perhaps they gave a full path? 
+  # perhaps they gave a full path?
   if [ ! -e $1 ] ; then
     echo Usage: $0 thor_cluster_name
     exit 1
@@ -82,8 +82,8 @@ mkdir -p $BACKUPNODE_DATA
 rm -f $BACKUPNODE_DATA/*.ERR
 rm -f $BACKUPNODE_DATA/*.DAT
 
-echo Using backupnode directory $BACKUPNODE_DATA 
-echo Reading slaves file $INSTANCE_DIR/slaves 
+echo Using backupnode directory $BACKUPNODE_DATA
+echo Reading slaves file $INSTANCE_DIR/slaves
 echo Scanning files from dali ...
 
 NODEGROUP=$THORPRIMARY
@@ -103,7 +103,8 @@ if [ $? -ne 0 ]; then
 fi
 
 frunssh $INSTANCE_DIR/slaves "killall backupnode" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
-frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA $INSTANCE_DIR/slaves %n $2 > $LOGPATH/${LOGDATE}_node%n.log'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
+echo frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA %n %c %a %x $2 > $LOGPATH/${LOGDATE}_node%n.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
+frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA %n %c %a %x $2 > $LOGPATH/${LOGDATE}_node%n.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
 
 echo ------------------------------
 sleep 5

+ 33 - 66
tools/backupnode/backupnode.cpp

@@ -209,9 +209,8 @@ static void usage()
 {
     printf("\nBACKUPNODE sourcepath targetpath [options]\n");
     printf("   Copies and optionally compresses files from source to target\n\n");
-    printf("BACKUPNODE -T slavesfile slaveno path1 path2 path3...\n");
-    printf("   Thor node backup mode - syncs named paths with adjacent d: drive\n\n");
-    printf("   if no paths specified use DAT files in directory specified by -X\n\n");
+    printf("BACKUPNODE -X <data-dir-path> -T slaveno numslaves myip backupip\n");
+    printf("   Thor node backup mode - syncs named paths with adjacent drive\n\n");
     printf("BACKUPNODE -W slavesfile dir\n");
     printf("   Waits for .ERR files in the specified directory then concatenates into a log file\n\n");
     printf("BACKUPNODE -O daliip cluster outdir\n");
@@ -633,18 +632,14 @@ int main(int argc, const char *argv[])
     {
         if (thorMode)
         { 
-            bool usedatfile=false;
+            if (args.ordinality()<4 || 0 == errdatdir.length())
+                usage();
+            slaveNum = atoi(args.item(0));
+            numSlaves = atoi(args.item(1));
+            const char *myIp = args.item(2);
+            const char *backupIp = args.item(3);
+
             setDaliServixSocketCaching(true); 
-            slaveNum = (args.ordinality()<2)?0:atoi(args.item(1));
-            if (args.ordinality()<3) {
-                if ((errdatdir.length()==0)||!slaveNum)
-                {
-                    printerr("-T option specified but no paths and no data dir/slave number specified");
-                    throw MakeStringException(MSGAUD_operator, 0, "-T option specified but no paths and no data dir/slave number specified");
-                }
-                usedatfile = true;
-            }
-            loadSlaves(args.item(0));
             if (!slaveNum || slaveNum>numSlaves)
             {
                 printerr("'%s' is not a valid slave number (range is 1 to %d)", args.item(1), numSlaves);
@@ -654,7 +649,7 @@ int main(int argc, const char *argv[])
             {
                 IpAddress myip;
                 GetHostIp(myip);
-                IpAddress myipfromSlaves(slaveIP[slaveNum-1]);
+                IpAddress myipfromSlaves(myIp);
                 if (!myip.ipequals(myipfromSlaves))
                 {
                     StringBuffer ips1, ips2;
@@ -664,61 +659,33 @@ int main(int argc, const char *argv[])
                     throw MakeStringException(-1, "IP address %d in slaves file %s does not match this machine %s", slaveNum, ips1.str(), ips2.str());
                 }
             }
-            if (usedatfile) {
-                StringBuffer datafile(errdatdir);
-                addPathSepChar(datafile).append(slaveNum).append(".DAT");
-                Owned<IFile> file = createIFile(datafile.str());
-                Owned<IFileIO> fio;
-                // add a slight stagger
-                Sleep(slaveNum*200);
-                for (unsigned attempt=0;attempt<10;attempt++) {
-                    try {
-                        fio.setown(file->open(IFOread));
-                        if (fio) 
-                            break;
-                    }
-                    catch (IException *e) {
-                        if (attempt==9) {
-                            StringBuffer msg;
-                            e->errorMessage(msg);
-                            printerr("%s",msg.str());
-                        }
-                        e->Release();
-                    }
-                    Sleep(5000);
+            StringBuffer datafile(errdatdir);
+            addPathSepChar(datafile).append(slaveNum).append(".DAT");
+            Owned<IFile> file = createIFile(datafile.str());
+            Owned<IFileIO> fio;
+            // add a slight stagger
+            Sleep(slaveNum*200);
+            for (unsigned attempt=0;attempt<10;attempt++) {
+                try {
+                    fio.setown(file->open(IFOread));
+                    if (fio)
+                        break;
                 }
-                if (fio) 
-                    applyPartsFile(fio,syncFile);
-                else {
-                    printerr("Could not read file %s",datafile.str());
-                    throw MakeStringException(-1, "Could not read file %s",datafile.str());
+                catch (IException *e) {
+                    if (attempt==9) {
+                        StringBuffer msg;
+                        e->errorMessage(msg);
+                        printerr("%s",msg.str());
+                    }
+                    e->Release();
                 }
+                Sleep(5000);
             }
+            if (fio)
+                applyPartsFile(fio,syncFile);
             else {
-                aindex_t numArgs = args.ordinality();
-                for (aindex_t idx = 2; idx<numArgs; ++idx)
-                {
-                    const char *arg = args.item(idx);
-
-                    StringBuffer backupDirectory;
-                    StringBuffer localDirectory;
-#ifdef _WIN32
-                    backupDirectory.append("\\\\").append(slaveIP[slaveNum]).append("\\d$\\").append(arg);
-                    localDirectory.append("\\\\").append(slaveIP[slaveNum-1]).append("\\c$\\").append(arg);
-#else
-                    if (useMirrorMount) 
-                        backupDirectory.append(unixmirror.get()).append("/").append(arg);
-                    else
-                        backupDirectory.append("//").append(slaveIP[slaveNum]).append("/d$/").append(arg);
-                    localDirectory.append("/c$/").append(arg);
-#endif
-                    if (compressExisting)
-                        CompressDirectory(localDirectory.str(), numSlaves, compress);
-                    CopyDirectory(backupDirectory.str(), localDirectory.str(), numSlaves, compress, false);
-                    if (compressExisting)
-                        CompressDirectory(backupDirectory.str(), numSlaves, compress);
-                    CopyDirectory(localDirectory.str(), backupDirectory.str(), numSlaves, compress, true);
-                }
+                printerr("Could not read file %s",datafile.str());
+                throw MakeStringException(-1, "Could not read file %s",datafile.str());
             }
         }
         else if (waitMode) {