소스 검색

Make backupnode not require slaves files on slaves

Backupnode required the slaves file to exist on all
slaves. In the new active group scheme, the slaves
file is dynamic and only exists on the master.
Change backupnode and script to pass relevant info
to child backup processes

+ Fix a redirection problem, which meant output from
the child backupnode processes was being lost

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith 13 년 전
부모
커밋
65f04c7673
3개의 변경된 파일41개의 추가작업 그리고 70개의 파일을 삭제
  1. 3 0
      common/remote/rmtssh.cpp
  2. 5 4
      initfiles/componentfiles/thor/start_backupnode.in
  3. 33 66
      tools/backupnode/backupnode.cpp

+ 3 - 0
common/remote/rmtssh.cpp

@@ -89,6 +89,9 @@ class CFRunSSH: public CInterface, implements IFRunSSH
                 case 'x': // Next Node
                     cmdbuf.append(slaves.item((nodenum+replicationoffset)%slaves.ordinality()));
                     break;
+                case 'c':
+                    cmdbuf.append(slaves.ordinality());
+                    break;
                 case 't': // Tree Node
                     if (treefrom)
                         cmdbuf.append(slaves.item(treefrom-1));

+ 5 - 4
initfiles/componentfiles/thor/start_backupnode.in

@@ -49,7 +49,7 @@ RUN_DIR=`cat ${HPCC_CONFIG} | sed -n "/\[DEFAULT\]/,/\[/p" | grep "^runtime=" |
 INSTANCE_DIR=$RUN_DIR/$1
 
 if [ ! -e $INSTANCE_DIR ] ; then
-  # perhaps they gave a full path? 
+  # perhaps they gave a full path?
   if [ ! -e $1 ] ; then
     echo Usage: $0 thor_cluster_name
     exit 1
@@ -82,8 +82,8 @@ mkdir -p $BACKUPNODE_DATA
 rm -f $BACKUPNODE_DATA/*.ERR
 rm -f $BACKUPNODE_DATA/*.DAT
 
-echo Using backupnode directory $BACKUPNODE_DATA 
-echo Reading slaves file $INSTANCE_DIR/slaves 
+echo Using backupnode directory $BACKUPNODE_DATA
+echo Reading slaves file $INSTANCE_DIR/slaves
 echo Scanning files from dali ...
 
 NODEGROUP=$THORPRIMARY
@@ -103,7 +103,8 @@ if [ $? -ne 0 ]; then
 fi
 
 frunssh $INSTANCE_DIR/slaves "killall backupnode" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
-frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA $INSTANCE_DIR/slaves %n $2 > $LOGPATH/${LOGDATE}_node%n.log'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
+echo frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA %n %c %a %x $2 > $LOGPATH/${LOGDATE}_node%n.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
+frunssh $INSTANCE_DIR/slaves "/bin/sh -c 'mkdir -p `dirname $LOGPATH/${LOGDATE}_node%n.log`; mkdir -p $INSTANCE_DIR; $DEPLOY_DIR/backupnode -T -X $BACKUPNODE_REMOTEDATA %n %c %a %x $2 > $LOGPATH/${LOGDATE}_node%n.log 2>&1'" -i:$SSHidentityfile -u:$SSHusername -pe:$SSHpassword -t:$SSHtimeout -a:$SSHretries -b >> $LOGFILE 2>&1
 
 echo ------------------------------
 sleep 5

+ 33 - 66
tools/backupnode/backupnode.cpp

@@ -209,9 +209,8 @@ static void usage()
 {
     printf("\nBACKUPNODE sourcepath targetpath [options]\n");
     printf("   Copies and optionally compresses files from source to target\n\n");
-    printf("BACKUPNODE -T slavesfile slaveno path1 path2 path3...\n");
-    printf("   Thor node backup mode - syncs named paths with adjacent d: drive\n\n");
-    printf("   if no paths specified use DAT files in directory specified by -X\n\n");
+    printf("BACKUPNODE -X <data-dir-path> -T slaveno numslaves myip backupip\n");
+    printf("   Thor node backup mode - syncs named paths with adjacent drive\n\n");
     printf("BACKUPNODE -W slavesfile dir\n");
     printf("   Waits for .ERR files in the specified directory then concatenates into a log file\n\n");
     printf("BACKUPNODE -O daliip cluster outdir\n");
@@ -633,18 +632,14 @@ int main(int argc, const char *argv[])
     {
         if (thorMode)
         { 
-            bool usedatfile=false;
+            if (args.ordinality()<4 || 0 == errdatdir.length())
+                usage();
+            slaveNum = atoi(args.item(0));
+            numSlaves = atoi(args.item(1));
+            const char *myIp = args.item(2);
+            const char *backupIp = args.item(3);
+
             setDaliServixSocketCaching(true); 
-            slaveNum = (args.ordinality()<2)?0:atoi(args.item(1));
-            if (args.ordinality()<3) {
-                if ((errdatdir.length()==0)||!slaveNum)
-                {
-                    printerr("-T option specified but no paths and no data dir/slave number specified");
-                    throw MakeStringException(MSGAUD_operator, 0, "-T option specified but no paths and no data dir/slave number specified");
-                }
-                usedatfile = true;
-            }
-            loadSlaves(args.item(0));
             if (!slaveNum || slaveNum>numSlaves)
             {
                 printerr("'%s' is not a valid slave number (range is 1 to %d)", args.item(1), numSlaves);
@@ -654,7 +649,7 @@ int main(int argc, const char *argv[])
             {
                 IpAddress myip;
                 GetHostIp(myip);
-                IpAddress myipfromSlaves(slaveIP[slaveNum-1]);
+                IpAddress myipfromSlaves(myIp);
                 if (!myip.ipequals(myipfromSlaves))
                 {
                     StringBuffer ips1, ips2;
@@ -664,61 +659,33 @@ int main(int argc, const char *argv[])
                     throw MakeStringException(-1, "IP address %d in slaves file %s does not match this machine %s", slaveNum, ips1.str(), ips2.str());
                 }
             }
-            if (usedatfile) {
-                StringBuffer datafile(errdatdir);
-                addPathSepChar(datafile).append(slaveNum).append(".DAT");
-                Owned<IFile> file = createIFile(datafile.str());
-                Owned<IFileIO> fio;
-                // add a slight stagger
-                Sleep(slaveNum*200);
-                for (unsigned attempt=0;attempt<10;attempt++) {
-                    try {
-                        fio.setown(file->open(IFOread));
-                        if (fio) 
-                            break;
-                    }
-                    catch (IException *e) {
-                        if (attempt==9) {
-                            StringBuffer msg;
-                            e->errorMessage(msg);
-                            printerr("%s",msg.str());
-                        }
-                        e->Release();
-                    }
-                    Sleep(5000);
+            StringBuffer datafile(errdatdir);
+            addPathSepChar(datafile).append(slaveNum).append(".DAT");
+            Owned<IFile> file = createIFile(datafile.str());
+            Owned<IFileIO> fio;
+            // add a slight stagger
+            Sleep(slaveNum*200);
+            for (unsigned attempt=0;attempt<10;attempt++) {
+                try {
+                    fio.setown(file->open(IFOread));
+                    if (fio)
+                        break;
                 }
-                if (fio) 
-                    applyPartsFile(fio,syncFile);
-                else {
-                    printerr("Could not read file %s",datafile.str());
-                    throw MakeStringException(-1, "Could not read file %s",datafile.str());
+                catch (IException *e) {
+                    if (attempt==9) {
+                        StringBuffer msg;
+                        e->errorMessage(msg);
+                        printerr("%s",msg.str());
+                    }
+                    e->Release();
                 }
+                Sleep(5000);
             }
+            if (fio)
+                applyPartsFile(fio,syncFile);
             else {
-                aindex_t numArgs = args.ordinality();
-                for (aindex_t idx = 2; idx<numArgs; ++idx)
-                {
-                    const char *arg = args.item(idx);
-
-                    StringBuffer backupDirectory;
-                    StringBuffer localDirectory;
-#ifdef _WIN32
-                    backupDirectory.append("\\\\").append(slaveIP[slaveNum]).append("\\d$\\").append(arg);
-                    localDirectory.append("\\\\").append(slaveIP[slaveNum-1]).append("\\c$\\").append(arg);
-#else
-                    if (useMirrorMount) 
-                        backupDirectory.append(unixmirror.get()).append("/").append(arg);
-                    else
-                        backupDirectory.append("//").append(slaveIP[slaveNum]).append("/d$/").append(arg);
-                    localDirectory.append("/c$/").append(arg);
-#endif
-                    if (compressExisting)
-                        CompressDirectory(localDirectory.str(), numSlaves, compress);
-                    CopyDirectory(backupDirectory.str(), localDirectory.str(), numSlaves, compress, false);
-                    if (compressExisting)
-                        CompressDirectory(backupDirectory.str(), numSlaves, compress);
-                    CopyDirectory(localDirectory.str(), backupDirectory.str(), numSlaves, compress, true);
-                }
+                printerr("Could not read file %s",datafile.str());
+                throw MakeStringException(-1, "Could not read file %s",datafile.str());
             }
         }
         else if (waitMode) {