瀏覽代碼

gh-1492 - Change the way Thor fetches it's group

Generate all groups, including multislave groups via environment update.
Thor fetches the group from dali at each startup.

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith 13 年之前
父節點
當前提交
56b0277e86
共有 37 個文件被更改,包括 457 次插入462 次删除
  1. 53 0
      dali/base/daclient.cpp
  2. 3 0
      dali/base/daclient.hpp
  3. 1 1
      dali/base/dacoven.cpp
  4. 63 0
      dali/base/dacsds.cpp
  5. 1 0
      dali/base/dacsds.ipp
  6. 76 24
      dali/base/dadfs.cpp
  7. 2 2
      dali/base/dadfs.hpp
  8. 40 1
      dali/base/dasds.cpp
  9. 1 0
      dali/base/dasds.hpp
  10. 1 0
      dali/base/dasds.ipp
  11. 10 26
      dali/daliadmin/daliadmin.cpp
  12. 1 1
      dali/dfu/dfuutil.cpp
  13. 1 1
      dali/server/daserver.cpp
  14. 6 77
      dali/updtdalienv/updtdalienv.cpp
  15. 0 36
      deployment/deploy/ThorDeploymentEngine.cpp
  16. 0 4
      deployment/deploy/ThorDeploymentEngine.hpp
  17. 0 37
      deployment/deploy/thorconfiggenengine.cpp
  18. 0 4
      deployment/deploy/thorconfiggenengine.hpp
  19. 39 129
      deployment/deployutils/configenvhelper.cpp
  20. 1 1
      deployment/deployutils/configenvhelper.hpp
  21. 23 1
      esp/files/scripts/configmgr/configmgr.js
  22. 10 4
      esp/files/scripts/configmgr/navtree.js
  23. 39 59
      esp/services/WsDeploy/WsDeployService.cpp
  24. 1 1
      initfiles/componentfiles/configxml/Environment.xsd
  25. 9 0
      initfiles/componentfiles/configxml/dali.xsd
  26. 3 0
      initfiles/componentfiles/configxml/dali.xsl
  27. 9 10
      initfiles/componentfiles/configxml/setvars_linux.xsl
  28. 4 4
      initfiles/componentfiles/configxml/thor.xsd.in
  29. 0 3
      initfiles/componentfiles/configxml/thor.xsl
  30. 28 13
      initfiles/componentfiles/thor/makethorgroup
  31. 1 2
      initfiles/componentfiles/thor/run_thor
  32. 1 1
      initfiles/componentfiles/thor/start_backupnode.in
  33. 2 1
      initfiles/componentfiles/thor/start_slaves
  34. 22 9
      initfiles/componentfiles/thor/start_thor
  35. 0 6
      initfiles/etc/DIR_NAME/environment.xml.in
  36. 5 3
      system/jlib/jfile.cpp
  37. 1 1
      system/jlib/jfile.hpp

+ 53 - 0
dali/base/daclient.cpp

@@ -239,3 +239,56 @@ void disconnectLogMsgListenerFromDali()
         disconnectLogMsgListenerFromChild(&servers.queryNode(idx));
 }
 
+
+bool updateDaliEnv(IPropertyTree *env, bool forceGroupUpdate, const char *daliIp)
+{
+    Owned<IPropertyTreeIterator> dalis = env->getElements("Software/DaliServerProcess/Instance");
+    if (!dalis||!dalis->first()) {
+        fprintf(stderr,"Could not find DaliServerProcess\n");
+        return false;
+    }
+    SocketEndpoint daliep;
+    loop {
+        const char *ps = dalis->get().queryProp("@port");
+        unsigned port = ps?atoi(ps):0;
+        if (!port)
+            port = DALI_SERVER_PORT;
+        daliep.set(dalis->get().queryProp("@netAddress"),port);
+        if (daliIp && *daliIp) {
+            SocketEndpoint testep;
+            testep.set(daliIp,DALI_SERVER_PORT);
+            if (testep.equals(daliep))
+                break;
+            daliep.set(NULL,0);
+        }
+        if (!dalis->next())
+            break;
+        if (!daliep.isNull()) {
+            fprintf(stderr,"Ambiguous DaliServerProcess instance\n");
+            return false;
+        }
+    }
+    if (daliep.isNull()) {
+        fprintf(stderr,"Could not find DaliServerProcess instance\n");
+        return false;
+    }
+    SocketEndpointArray epa;
+    epa.append(daliep);
+    Owned<IGroup> group = createIGroup(epa);
+
+    bool ret = true;
+    initClientProcess(group, DCR_Util);
+    StringBuffer response;
+    if (querySDS().updateEnvironment(env, forceGroupUpdate, response))
+    {
+        StringBuffer tmp;
+        PROGLOG("Environment and node groups updated in dali at %s",daliep.getUrlStr(tmp).str());
+    }
+    else
+        ret = false;
+    if (response.length())
+        WARNLOG("%s", response.str());
+
+    closedownClientProcess();
+    return ret;
+}

+ 3 - 0
dali/base/daclient.hpp

@@ -59,6 +59,9 @@ extern da_decl void disconnectLogMsgManagerFromDali();
 extern da_decl void connectLogMsgListenerToDali();
 extern da_decl void disconnectLogMsgListenerFromDali();
 
+// initates client session and updates dali pointed to by environment, unless daliIp supplied
+extern da_decl bool updateDaliEnv(IPropertyTree *env, bool updateDaliEnv=false, const char *daliIp=NULL);
+
 
 
 // the class below fills in the Status/Servers branch

+ 1 - 1
dali/base/dacoven.cpp

@@ -38,7 +38,7 @@ extern void closedownDFS();
 // base is saved in store whenever block exhausted, so replacement coven servers can restart 
 
 // server side versioning.
-#define ServerVersion    "3.8"
+#define ServerVersion    "3.9"
 #define MinClientVersion "1.5"
 
 

+ 63 - 0
dali/base/dacsds.cpp

@@ -20,6 +20,7 @@
 #include "platform.h"
 #include <typeinfo>
 #include "jlib.hpp"
+#include "jfile.hpp"
 #include "javahash.hpp"
 #include "javahash.tpp"
 #include "jptree.ipp"
@@ -30,6 +31,7 @@
 #include "daserver.hpp"
 #include "dasess.hpp"
 #include "daclient.hpp"
+#include "dadfs.hpp"
 
 #include "dasds.ipp" // common header for client/server sds
 #include "dacsds.ipp"
@@ -1952,6 +1954,67 @@ unsigned CClientSDSManager::queryCount(const char *xpath)
     return count;
 }
 
+#define MIN_UPDTENV_SVER "3.9"
+bool CClientSDSManager::updateEnvironment(IPropertyTree *newEnv, bool forceGroupUpdate, StringBuffer &response)
+{
+    CDaliVersion serverVersionNeeded(MIN_QUERYCOUNT_SVER);
+    if (queryDaliServerVersion().compare(serverVersionNeeded) < 0)
+    {
+        // have to do the old fashioned way, from client
+        Owned<IRemoteConnection> conn = querySDS().connect("/",myProcessSession(),0, INFINITE);
+        if (conn)
+        {
+            Owned<IPropertyTree> root = conn->getRoot();
+            Owned<IPropertyTree> child = root->getPropTree("Environment");
+            if (child.get())
+            {
+                StringBuffer bakname;
+                Owned<IFileIO> io = createUniqueFile(NULL, "environment", "bak", bakname);
+                Owned<IFileIOStream> fstream = createBufferedIOStream(io);
+                toXML(child, *fstream);         // formatted (default)
+                root->removeTree(child);
+            }
+            root->addPropTree("Environment", LINK(newEnv));
+            root.clear();
+            conn->commit();
+            conn->close();
+            StringBuffer messages;
+            if (!initClusterGroups(forceGroupUpdate, messages))
+                WARNLOG("CClientSDSManager::updateEnvironment: %s", messages.str());
+            PROGLOG("Environment and node groups updated");
+        }
+        return true;
+    }
+
+    CMessageBuffer mb;
+    mb.append((int)DAMP_SDSCMD_UPDTENV);
+    newEnv->serialize(mb);
+    mb.append(forceGroupUpdate);
+
+    if (!queryCoven().sendRecv(mb, RANK_RANDOM, MPTAG_DALI_SDS_REQUEST))
+        throw MakeSDSException(SDSExcpt_FailedToCommunicateWithServer, "querying sds diagnositc info");
+
+    bool result = false;
+    StringAttr resultStr;
+    SdsReply replyMsg;
+    mb.read((int &)replyMsg);
+    switch (replyMsg)
+    {
+        case DAMP_SDSREPLY_OK:
+        {
+            mb.read(result);
+            mb.read(resultStr);
+            response.append(resultStr);
+            break;
+        }
+        case DAMP_SDSREPLY_ERROR:
+            throwMbException("SDS Reply Error ", mb);
+        default:
+            assertex(false);
+    }
+    return result;
+}
+
 //////////////
 
 ISDSManager &querySDS()

+ 1 - 0
dali/base/dacsds.ipp

@@ -403,6 +403,7 @@ public:
     virtual IPropertyTreeIterator *getElementsRaw(const char *xpath, INode *remotedali, unsigned timeout);
     virtual void setConfigOpt(const char *opt, const char *value);
     virtual unsigned queryCount(const char *xpath);
+    virtual bool updateEnvironment(IPropertyTree *newEnv, bool forceGroupUpdate, StringBuffer &response);
 
 private:
     CriticalSection crit;

+ 76 - 24
dali/base/dadfs.cpp

@@ -1287,7 +1287,7 @@ public:
                 isactive = true;
                 commit();
             }
-            catch (IException *e) {
+            catch (IException *) {
                 rollback();
                 throw;
             }
@@ -7601,7 +7601,7 @@ class CInitGroups
     unsigned defaultTimeout;
 
 
-    void addClusterGroup(const char *name,IGroup *group,const char *kind, bool realcluster, const char *dir)
+    bool addClusterGroup(const char *name,IGroup *group,const char *kind, bool realcluster, const char *dir,bool force)
     {
         StringBuffer lcname(name);
         name = lcname.trim().toLowerCase().str();
@@ -7611,9 +7611,9 @@ class CInitGroups
         IPropertyTree *old = root->queryPropTree(prop.str());
         if (group)
             clusternames.append(name);
+        bool differs=false;
         if (old) {
             // see if identical
-            bool differs=false;
             const char *oldk = old->queryProp("@kind");
             if (oldk) {
                 if (kind)
@@ -7643,6 +7643,7 @@ class CInitGroups
                     SocketEndpoint ep(pe->query().queryProp("@ip"));
                     if (!ep.equals(group->queryNode(i).endpoint())) {
                         differs = true;
+                        break;
                     }
                     i++;
                 }
@@ -7652,13 +7653,16 @@ class CInitGroups
                             old->setPropInt("@cluster",1);
                         else
                             old->removeProp("@cluster");
-                    return;
+                    return true;
                 }
             }
-            root->removeProp(prop.str()); 
+            if (!differs || force)
+                root->removeProp(prop.str());
         }
         if (!group)
-            return;
+            return false;
+        if (differs && !force)
+            return false;
         IPropertyTree *val = createPTree("Group");
         val->setProp("@name",name);
         if (realcluster)
@@ -7679,6 +7683,7 @@ class CInitGroups
         }
         gi.Release();
         root->addPropTree("Group",val);
+        return true;
     }
 
 
@@ -7760,23 +7765,45 @@ class CInitGroups
     }
 
 
-    void constructGroup(IPropertyTree& cluster,bool roxie,const char *processname, const char* defdir)
+    bool constructGroup(IPropertyTree& cluster,bool roxie,const char *processname, const char* defdir,bool force,StringBuffer &messages)
     {
         const char *groupname = cluster.queryProp("@name");
         const char *nodegroupname = cluster.queryProp("@nodeGroup");
         bool realcluster = !nodegroupname||!*nodegroupname||(strcmp(nodegroupname,groupname)==0);
         SocketEndpointArray eps;
         loadEndpoints(cluster,eps,roxie,false,processname);
+        bool ret = true;
         if (eps.ordinality()) {
-            Owned<IGroup> grp = createIGroup(eps);
-            addClusterGroup(groupname,grp,roxie?"Roxie":"Thor",realcluster,defdir);
+            Owned<IGroup> grp;
+            unsigned slavesPerNode = cluster.getPropInt("@slavesPerNode");
+            if (slavesPerNode)
+            {
+                SocketEndpointArray msEps;
+                for (unsigned s=0; s<slavesPerNode; s++)
+                {
+                    ForEachItemIn(e, eps)
+                        msEps.append(eps.item(e));
+                }
+                grp.setown(createIGroup(msEps));
+            }
+            else
+                grp.setown(createIGroup(eps));
+            if (!addClusterGroup(groupname,grp,roxie?"Roxie":"Thor",realcluster,defdir,force))
+            {
+                ret = false;
+                VStringBuffer msg("New constructed group definition for cluster %s, mismatched existing group layout", groupname);
+                WARNLOG("%s", msg.str());
+                messages.append(msg).newline();
+            }
 //          DBGLOG("GROUP: %s updated\n",groupname);
         }
+        return ret;
     }
 
-    void constructFarmGroup(IPropertyTree& cluster)
+    bool constructFarmGroup(IPropertyTree& cluster,bool force,StringBuffer &messages)
     {
         Owned<IPropertyTreeIterator> farms = cluster.getElements("RoxieFarmProcess");  // probably only one but...
+        bool ret = true;
         ForEach(*farms) {
             IPropertyTree& farm = farms->query();
             StringBuffer groupname(cluster.queryProp("@name"));
@@ -7786,10 +7813,17 @@ class CInitGroups
             loadEndpoints(farm,eps,true,true,"RoxieServerProcess");
             if (eps.ordinality()) {
                 Owned<IGroup> grp = createIGroup(eps);
-                addClusterGroup(groupname.str(),grp,"RoxieFarm",true,farm.queryProp("@dataDirectory"));
+                if (!addClusterGroup(groupname.str(),grp,"RoxieFarm",true,farm.queryProp("@dataDirectory"),force))
+                {
+                    ret = false;
+                    VStringBuffer msg("New constructed group definition for cluster %s, mismatched existing group layout", groupname.str());
+                    WARNLOG("%s", msg.str());
+                    messages.append(msg).newline();
+                }
     //          DBGLOG("GROUP: %s updated\n",groupname);
             }
-        }           
+        }
+        return ret;
     }
 
 
@@ -7801,24 +7835,30 @@ public:
         defaultTimeout = _defaultTimeout;
     }
 
-    void constructGroups()
+    bool constructGroups(bool force, StringBuffer &messages)
     {
         Owned<IRemoteConnection> conn = querySDS().connect("/Environment/Software", myProcessSession(), RTM_LOCK_READ, SDS_CONNECT_TIMEOUT);
         if (!conn)
-            return;
+            return false;
+        bool ret=true;
         IPropertyTree* root = conn->queryRoot();
         Owned<IPropertyTreeIterator> clusters;
         if (loadMachineMap()) { 
             clusters.setown(root->getElements("ThorCluster"));
             ForEach(*clusters) 
-                constructGroup(clusters->query(),false,"ThorSlaveProcess",NULL);
+            {
+                if (!constructGroup(clusters->query(),false,"ThorSlaveProcess",NULL,force,messages))
+                    ret = false;
+            }
             clusters.setown(root->getElements("RoxieCluster"));
             ForEach(*clusters) {
                 const char *dir = clusters->query().queryProp("@slaveDataDir");
                 if (!dir||!*dir)
                     dir = clusters->query().queryProp("@baseDataDir");
-                constructGroup(clusters->query(),true,"RoxieSlave",dir);
-                constructFarmGroup(clusters->query());
+                if (!constructGroup(clusters->query(),true,"RoxieSlave",dir,force,messages))
+                    ret = false;
+                if (!constructFarmGroup(clusters->query(),force,messages))
+                    ret = false;
             }
             clusters.setown(root->getElements("EclAgentProcess"));
             ForEach(*clusters) {
@@ -7838,7 +7878,13 @@ public:
                                 if (ins>1)
                                     gname.append('_').append(ins);
                                 Owned<IGroup> grp = createIGroup(1,&ep);
-                                addClusterGroup(gname.str(),grp,"hthor",true,NULL);
+                                if (!addClusterGroup(gname.str(),grp,"hthor",true,NULL,force))
+                                {
+                                    ret = false;
+                                    VStringBuffer msg("New constructed group definition for EclAgentProcess %s, mismatched existing group layout", groupname);
+                                    WARNLOG("%s", msg.str());
+                                    messages.append(msg).newline();
+                                }
                             }
                         }
                     }
@@ -7865,13 +7911,14 @@ public:
             }
         }
 //      DBGLOG("Initialized cluster groups");
+        return ret;
     }
 };
 
-void initClusterGroups(unsigned timems)
+bool initClusterGroups(bool force, StringBuffer &response, unsigned timems)
 {
     CInitGroups init(timems);
-    init.constructGroups();
+    return init.constructGroups(force, response);
 }
 
 
@@ -7880,13 +7927,16 @@ class CDaliDFSServer: public Thread, public CTransactionLogTracker, implements I
     
     bool stopped;
     unsigned defaultTimeout;
+    bool forceGroupUpdate;
+
 public:
 
     IMPLEMENT_IINTERFACE;
 
-    CDaliDFSServer()
+    CDaliDFSServer(IPropertyTree *config)
         : Thread("CDaliDFSServer"), CTransactionLogTracker(MDFS_MAX)
     {
+        forceGroupUpdate = config->getPropBool("DFS/@forceGroupUpdate");
         stopped = true;
         defaultTimeout = INFINITE; // server uses default
     }
@@ -7902,7 +7952,9 @@ public:
 
     void ready()
     {
-        initClusterGroups();
+        StringBuffer response;
+        if (!initClusterGroups(forceGroupUpdate, response)) // false indicates some groups clashed and were not updated
+            PROGLOG("DFS group initialization : %s", response.str()); // should this be a syslog?
     }
     
     void suspend()
@@ -8844,10 +8896,10 @@ bool removePhysicalFiles(IGroup *grp,const char *_filemask,unsigned short port,C
 }
 
 
-IDaliServer *createDaliDFSServer()
+IDaliServer *createDaliDFSServer(IPropertyTree *config)
 {
     assertex(!daliDFSServer); // initialization problem
-    daliDFSServer = new CDaliDFSServer();
+    daliDFSServer = new CDaliDFSServer(config);
     return daliDFSServer;
 }
 

+ 2 - 2
dali/base/dadfs.hpp

@@ -607,10 +607,10 @@ extern da_decl bool removePhysicalFiles(IGroup *grp,const char *_filemask,unsign
 
 // for server use
 interface IDaliServer;
-extern da_decl IDaliServer *createDaliDFSServer(); // called for coven members
+extern da_decl IDaliServer *createDaliDFSServer(IPropertyTree *config); // called for coven members
 
 // to initialize clustergroups after clusters change in the environment
-extern da_decl void initClusterGroups(unsigned timems=INFINITE);
+extern da_decl bool initClusterGroups(bool force, StringBuffer &response, unsigned timems=INFINITE);
 
 extern da_decl IDistributedFileTransaction *createDistributedFileTransaction(IUserDescriptor *user=NULL);
 

+ 40 - 1
dali/base/dasds.cpp

@@ -30,6 +30,7 @@
 #include "jptree.ipp"
 #include "jqueue.tpp"
 #include "dautils.hpp"
+#include "dadfs.hpp"
 
 #define DEBUG_DIR "debug"
 #define DEFAULT_KEEP_LASTN_STORES 1
@@ -1909,6 +1910,7 @@ public:
     virtual IPropertyTreeIterator *getElementsRaw(const char *xpath,INode *remotedali=NULL, unsigned timeout=MP_WAIT_FOREVER);
     virtual void setConfigOpt(const char *opt, const char *value);
     virtual unsigned queryCount(const char *xpath);
+    virtual bool updateEnvironment(IPropertyTree *newEnv, bool forceGroupUpdate, StringBuffer &response);
 
 // ISubscriptionManager impl.
     virtual void add(ISubscription *subs,SubscriptionId id);
@@ -3644,6 +3646,16 @@ int CSDSTransactionServer::run()
                             manager.queryProperties().serialize(mb);
                             break;
                         }
+                        case DAMP_SDSCMD_UPDTENV:
+                        {
+                            Owned<IPropertyTree> newEnv = createPTree(mb);
+                            bool forceGroupUpdate;
+                            mb.read(forceGroupUpdate);
+                            StringBuffer response;
+                            bool result = manager.updateEnvironment(newEnv, forceGroupUpdate, response);
+                            mb.clear().append(DAMP_SDSREPLY_OK).append(result).append(response);
+                            break;
+                        }
                         default:
                             throw MakeSDSException(SDSExcpt_UnrecognisedCommand, "%d", action);
                     }
@@ -5272,7 +5284,7 @@ public:
         {
             unsigned crc = 0;
             StringBuffer tmpStoreName;
-            OwnedIFileIO iFileIOTmpStore = createUniqueFile(location, TMPSAVENAME, tmpStoreName);
+            OwnedIFileIO iFileIOTmpStore = createUniqueFile(location, TMPSAVENAME, NULL, tmpStoreName);
             OwnedIFile iFileTmpStore = createIFile(tmpStoreName);
             try
             {
@@ -7857,6 +7869,33 @@ StringBuffer &CCovenSDSManager::getUsageStats(StringBuffer &out)
     return out;
 }
 
+bool CCovenSDSManager::updateEnvironment(IPropertyTree *newEnv, bool forceGroupUpdate, StringBuffer &response)
+{
+    Owned<IRemoteConnection> conn = querySDS().connect("/",myProcessSession(),0, INFINITE);
+    if (conn)
+    {
+        Owned<IPropertyTree> root = conn->getRoot();
+        Owned<IPropertyTree> child = root->getPropTree("Environment");
+        if (child.get())
+        {
+            StringBuffer bakname;
+            Owned<IFileIO> io = createUniqueFile(NULL, "environment", "bak", bakname);
+            Owned<IFileIOStream> fstream = createBufferedIOStream(io);
+            toXML(child, *fstream);         // formatted (default)
+            root->removeTree(child);
+        }
+        root->addPropTree("Environment", LINK(newEnv));
+        root.clear();
+        conn->commit();
+        conn->close();
+        StringBuffer messages;
+        initClusterGroups(forceGroupUpdate, messages);
+        response.append(messages);
+        PROGLOG("Environment and node groups updated");
+    }
+    return true;
+}
+
 // TODO
 StringBuffer &CCovenSDSManager::getExternalReport(StringBuffer &out)
 {

+ 1 - 0
dali/base/dasds.hpp

@@ -103,6 +103,7 @@ interface ISDSManager
     virtual IPropertyTreeIterator *getElementsRaw(const char *xpath,INode *remotedali=NULL, unsigned timeout=MP_WAIT_FOREVER) = 0;
     virtual void setConfigOpt(const char *opt, const char *value) = 0;
     virtual unsigned queryCount(const char *xpath) = 0;
+    virtual bool updateEnvironment(IPropertyTree *newEnv, bool forceGroupUpdate, StringBuffer &response) = 0;
 };
 
 extern da_decl const char *queryNotifyHandlerName(IPropertyTree *tree);

+ 1 - 0
dali/base/dasds.ipp

@@ -67,6 +67,7 @@ enum SdsCommand { DAMP_SDSCMD_CONNECT, DAMP_SDSCMD_GET, DAMP_SDSCMD_GETCHILDREN,
                   DAMP_SDSCMD_VERSION, DAMP_SDSCMD_DIAGNOSTIC, DAMP_SDSCMD_GETELEMENTS, DAMP_SDSCMD_MCONNECT, DAMP_SDSCMD_GETCHILDREN2, DAMP_SDSCMD_GET2, DAMP_SDSCMD_GETPROPS,
                   DAMP_SDSCMD_GETXPATHS, DAMP_SDSCMD_GETEXTVALUE, DAMP_SDSCMD_GETXPATHSPLUSIDS, DAMP_SDSCMD_GETXPATHSCRITERIA, DAMP_SDSCMD_GETELEMENTSRAW,
                   DAMP_SDSCMD_GETCOUNT,
+                  DAMP_SDSCMD_UPDTENV,
                   DAMP_SDSCMD_MAX,
                   DAMP_SDSCMD_LAZYEXT=0x80000000
                 };

+ 10 - 26
dali/daliadmin/daliadmin.cpp

@@ -151,20 +151,6 @@ static const char *splitpath(const char *path,StringBuffer &head,StringBuffer &t
     return splitXPath(path, head);
 }
 
-static void getBackSuffix(StringBuffer &out)
-{
-    if (out.length())
-        out.append('_');
-    CDateTime dt;
-    dt.setNow();
-    dt.getString(out);
-    unsigned i;
-    for (i=0;i<out.length();i++)
-        if (out.charAt(i)==':')
-            out.setCharAt(i,'_');
-    out.append(".bak");
-}
-
 // NB: there's strtoll under Linux
 static unsigned __int64 hextoll(const char *str, bool &error)
 {
@@ -264,11 +250,9 @@ static void import(const char *path,const char *src,bool add)
         Owned<IRemoteConnection> bconn = querySDS().connect(remLeading(path),myProcessSession(),RTM_LOCK_READ|RTM_SUB, INFINITE);
         if (bconn) {
             Owned<IPropertyTree> broot = bconn->getRoot();
-            StringBuffer bakname(tail);
-            getBackSuffix(bakname);
+            StringBuffer bakname;
+            Owned<IFileIO> io = createUniqueFile(NULL, tail, "bak", bakname);
             OUTLOG("Saving backup of %s to %s",path,bakname.str());
-            Owned<IFile> f = createIFile(bakname.str());
-            Owned<IFileIO> io = f->open(IFOcreate);
             Owned<IFileIOStream> fstream = createBufferedIOStream(io);
             toXML(broot, *fstream);         // formatted (default)
         }
@@ -280,8 +264,8 @@ static void import(const char *path,const char *src,bool add)
     }
     Owned<IPropertyTree> root = conn->getRoot();
     if (!add) {
-      Owned<IPropertyTree> child = root->getPropTree(tail);
-      root->removeTree(child);
+        Owned<IPropertyTree> child = root->getPropTree(tail);
+        root->removeTree(child);
     }
     root->addPropTree(tail,LINK(branch));
     conn->commit();
@@ -291,7 +275,9 @@ static void import(const char *path,const char *src,bool add)
         path++;
     if (strcmp(path,"Environment")==0) {
         OUTLOG("Refreshing cluster groups from Environment");
-        initClusterGroups();
+        StringBuffer response;
+        if (!initClusterGroups(false, response))
+            WARNLOG("updating Environment via import path=%s, some groups clash and were not updated : %s", path, response.str());
     }
 }
 
@@ -318,10 +304,8 @@ static void _delete_(const char *path,bool backup)
     }
     if (backup) {
         StringBuffer bakname;
-        getBackSuffix(bakname);
+        Owned<IFileIO> io = createUniqueFile(NULL,"daliadmin", "bak", bakname);
         OUTLOG("Saving backup of %s/%s to %s",head.str(),tail,bakname.str());
-        Owned<IFile> f = createIFile(bakname.str());
-        Owned<IFileIO> io = f->open(IFOcreate);
         Owned<IFileIOStream> fstream = createBufferedIOStream(io);
         toXML(child, *fstream);         // formatted (default)
     }
@@ -625,7 +609,7 @@ static void dfsgroup(const char *name)
     StringBuffer eps;
     for (unsigned i=0;i<group->ordinality();i++) {
         group->queryNode(i).endpoint().getUrlStr(eps.clear());
-        OUTLOG("%d: %s",i,eps.str());
+        OUTLOG("%s",eps.str());
     }
 }
 
@@ -2276,7 +2260,7 @@ int main(int argc, char* argv[])
             }
             else if (stricmp(cmd,"dfsmap")==0) {
                 CHECKPARAMS(1,1);
-                dfsgroup(params.item(1));
+                dfsmap(params.item(1));
             }
             else if (stricmp(cmd,"dfsexist")==0) {
                 CHECKPARAMS(1,1);

+ 1 - 1
dali/dfu/dfuutil.cpp

@@ -737,7 +737,7 @@ public:
                     transaction->commit();
                 }
             }
-        } catch (IException *e) {
+        } catch (IException *) {
             // TODO: DFS transaction could take care of it
             if (newfile)
                 queryDistributedFileDirectory().removeEntry(superfname,user);

+ 1 - 1
dali/server/daserver.cpp

@@ -86,7 +86,7 @@ void AddServers(const char *auditdir)
     servers.append(*createDaliPublisherServer());
     servers.append(*createDaliSDSServer(serverConfig));
     servers.append(*createDaliNamedQueueServer());
-    servers.append(*createDaliDFSServer());
+    servers.append(*createDaliDFSServer(serverConfig));
     servers.append(*createDaliAuditServer(auditdir));
     servers.append(*createDaliDiagnosticsServer());
     // add new coven servers here

+ 6 - 77
dali/updtdalienv/updtdalienv.cpp

@@ -42,25 +42,11 @@
 static void usage(const char *exe)
 {
     printf("Update dali with environment.xml changes:\n");
-    printf("  %s <environment-xml-file>\n", exe);
-    printf("  %s <environment-xml-file> -i <dali-ip>\n", exe);
+    printf("  %s <environment-xml-file> [-i <dali-ip>] [-f]\n", exe);
     printf("Retrieve directory information:\n"); 
     printf("  %s <environment-xml-file> -d category component instance [-ip ip]\n", exe);
 }
 
-static void getBackSuffix(StringBuffer &out)
-{
-    out.append("environment_");
-    CDateTime dt;
-    dt.setNow();
-    dt.getString(out);
-    unsigned i;
-    for (i=0;i<out.length();i++)
-        if (out.charAt(i)==':')
-            out.setCharAt(i,'_');
-    out.append(".bak");
-}
-
 
 int main(int argc, char* argv[])
 {
@@ -71,6 +57,7 @@ int main(int argc, char* argv[])
         usage(argv[0]);
         return -1;
     }
+    bool forceGroupUpdate = false;
     StringBuffer filename;
     StringBuffer inst;
     StringBuffer dcat;
@@ -82,6 +69,9 @@ int main(int argc, char* argv[])
             if ((stricmp(argv[i],"-i")==0)&&(i+1<argc)) {
                 inst.append(argv[++i]);
             }
+            else if (0==stricmp(argv[i],"-f")) {
+                forceGroupUpdate = true;
+            }
             else if ((stricmp(argv[i],"-d")==0)&&(i+3<argc)) {
                 dcat.append(argv[++i]);
                 dcomp.append(argv[++i]);
@@ -142,69 +132,8 @@ int main(int argc, char* argv[])
             }
         }
         else {
-            Owned<IPropertyTreeIterator> dalis = env->getElements("Software/DaliServerProcess/Instance");
-            if (!dalis||!dalis->first()) {
-                fprintf(stderr,"Could not find DaliServerProcess in %s\n",argv[1]);
-                return 1;
-            }
-            SocketEndpoint daliep;
-            loop {
-                const char *ps = dalis->get().queryProp("@port");
-                unsigned port = ps?atoi(ps):0;
-                if (!port)
-                    port = DALI_SERVER_PORT;
-                daliep.set(dalis->get().queryProp("@netAddress"),port);
-                if (inst.length()) {
-                    SocketEndpoint testep;
-                    testep.set(inst.str(),DALI_SERVER_PORT);
-                    if (testep.equals(daliep))
-                        break;
-                    daliep.set(NULL,0);
-                }   
-                if (!dalis->next())
-                    break;
-                if (!daliep.isNull()) {
-                    fprintf(stderr,"Ambiguous DaliServerProcess instance in %s\n",argv[1]);
-                    return 1;
-                }
-            }
-            if (daliep.isNull()) {
-                fprintf(stderr,"Could not find DaliServerProcess instance in %s\n",argv[1]);
-                return 1;
-            }
-            SocketEndpointArray epa;
-            epa.append(daliep);
-            Owned<IGroup> group = createIGroup(epa);
-
-
-            initClientProcess(group, DCR_Util);
-            Owned<IRemoteConnection> conn = querySDS().connect("/",myProcessSession(),0, INFINITE);
-            if (conn) {
-                Owned<IPropertyTree> root = conn->getRoot();
-                Owned<IPropertyTree> child = root->getPropTree("Environment");
-                if (child.get()) {
-                    StringBuffer bakname;
-                    getBackSuffix(bakname);
-                    Owned<IFile> f = createIFile(bakname.str());
-                    Owned<IFileIO> io = f->open(IFOcreate);
-                    Owned<IFileIOStream> fstream = createBufferedIOStream(io);
-                    toXML(child, *fstream);         // formatted (default)
-                    root->removeTree(child);
-                }
-                root->addPropTree("Environment",env.getClear());
-                root.clear();
-                conn->commit();
-                conn->close();
-                initClusterGroups();
-                StringBuffer tmp;
-                printf("Environment and node groups updated in dali at %s",daliep.getUrlStr(tmp).str());
-            }
-            else {
-                fprintf(stderr,"Could not connect to /\n");
+            if (!updateDaliEnv(env, forceGroupUpdate, inst.str()))
                 ret = 1;
-            }
-
-            closedownClientProcess();
         }
     }
     catch (IException *e) {

+ 0 - 36
deployment/deploy/ThorDeploymentEngine.cpp

@@ -42,39 +42,3 @@ void CThorDeploymentEngine::check()
    if (!dali || !*dali )
       throw MakeStringException(0, "No dali server is defined for thor %s", m_process.queryProp("@name"));
 }
-
-
-//---------------------------------------------------------------------------
-// copyInstallFiles
-//---------------------------------------------------------------------------
-void CThorDeploymentEngine::copyInstallFiles(IPropertyTree& instanceNode, const char* destPath)
-{
-   // Copy install files
-   CDeploymentEngine::copyInstallFiles(instanceNode, destPath);
-
-   EnvMachineOS os = m_envDepEngine.lookupMachineOS(instanceNode);
-
-   if (!m_compare)
-      ensurePath(destPath);
-
-   // Create slaves and spares files
-   writeComputerFile("./ThorSlaveProcess", StringBuffer(destPath).append("slaves").str(), os);
-   writeComputerFile("./ThorSpareProcess", StringBuffer(destPath).append("spares").str(), os);
-}
-
-
-//---------------------------------------------------------------------------
-// writeComputerFile
-//---------------------------------------------------------------------------
-void CThorDeploymentEngine::writeComputerFile(const char* type, const char* filename, EnvMachineOS os/*=MachineOsUnknown*/)
-{
-   StringBuffer str;
-   Owned<IPropertyTreeIterator> iter = m_process.getElements(type);
-    for(iter->first(); iter->isValid(); iter->next())
-    {
-        StringAttr netAddress;
-      if (m_envDepEngine.lookupNetAddress(netAddress, iter->query().queryProp("@computer")).length() > 0)
-         str.appendf("%s\n", netAddress.get());
-    }
-   writeFile(filename, str.str(), os);
-}

+ 0 - 4
deployment/deploy/ThorDeploymentEngine.hpp

@@ -33,10 +33,6 @@ public:
 
 protected:
    void check();
-   virtual void copyInstallFiles(IPropertyTree& instanceNode, const char* destPath);
-
-private:
-   void writeComputerFile(const char* type, const char* filename, EnvMachineOS os=MachineOsUnknown);
 };
 //---------------------------------------------------------------------------
 #endif // THORDEPLOYMENTENGINE_HPP_INCL

+ 0 - 37
deployment/deploy/thorconfiggenengine.cpp

@@ -45,40 +45,3 @@ void CThorConfigGenEngine::check()
    if (!dali || !*dali )
       throw MakeStringException(0, "No dali server is defined for thor %s", m_process.queryProp("@name"));
 }
-
-
-//---------------------------------------------------------------------------
-// copyInstallFiles
-//---------------------------------------------------------------------------
-void CThorConfigGenEngine::copyInstallFiles(IPropertyTree& instanceNode, const char* destPath)
-{
-   // Copy install files
-   CDeploymentEngine::copyInstallFiles(instanceNode, destPath);
-
-   EnvMachineOS os = m_envDepEngine.lookupMachineOS(instanceNode);
-
-   if (!m_compare)
-      ensurePath(destPath);
-
-   StringBuffer sbDestDir(StringBuffer(destPath).append(m_process.queryProp("@name")).append(PATHSEPCHAR));
-   // Create slaves and spares files
-   writeComputerFile("./ThorSlaveProcess", StringBuffer(sbDestDir).append("slaves").str(), os);
-   writeComputerFile("./ThorSpareProcess", StringBuffer(sbDestDir).append("spares").str(), os);
-}
-
-
-//---------------------------------------------------------------------------
-// writeComputerFile
-//---------------------------------------------------------------------------
-void CThorConfigGenEngine::writeComputerFile(const char* type, const char* filename, EnvMachineOS os/*=MachineOsUnknown*/)
-{
-   StringBuffer str;
-   Owned<IPropertyTreeIterator> iter = m_process.getElements(type);
-    for(iter->first(); iter->isValid(); iter->next())
-    {
-        StringAttr netAddress;
-      if (m_envDepEngine.lookupNetAddress(netAddress, iter->query().queryProp("@computer")).length() > 0)
-         str.appendf("%s\n", netAddress.get());
-    }
-   writeFile(filename, str.str(), os);
-}

+ 0 - 4
deployment/deploy/thorconfiggenengine.hpp

@@ -35,10 +35,6 @@ public:
 
 protected:
    void check();
-   virtual void copyInstallFiles(IPropertyTree& instanceNode, const char* destPath);
-
-private:
-   void writeComputerFile(const char* type, const char* filename, EnvMachineOS os=MachineOsUnknown);
 };
 //---------------------------------------------------------------------------
 #endif // THORCONFIGGENENGINE_HPP_INCL

+ 39 - 129
deployment/deployutils/configenvhelper.cpp

@@ -70,17 +70,21 @@ bool CConfigEnvHelper::handleThorTopologyOp(const char* cmd, const char* xmlArg,
                 computers.push_back(pComputer);
         }
 
+        if (!strcmp(newType, "Master") && computers.size() != 1)
+          throw MakeStringException(-1, "Thor cannot have more than one master. Please choose one computer only!");
+
         int numNodes = 1;
         if (slavesPerNode && *slavesPerNode)
             numNodes = atoi(slavesPerNode);
 
         if (numNodes < 1)
             numNodes = 1;
+        pThor->setPropInt("@slavesPerNode", numNodes);
 
         if (!strcmp(newType, "Master"))
             retVal = this->AddNewNodes(pThor, XML_TAG_THORMASTERPROCESS, 0, computers, checkComps, skipExisting, usageList);
         else if (!strcmp(newType, "Slave"))
-            retVal = this->AddNewNodes(pThor, XML_TAG_THORSLAVEPROCESS, 0, computers, checkComps, skipExisting, usageList, numNodes);
+            retVal = this->AddNewNodes(pThor, XML_TAG_THORSLAVEPROCESS, 0, computers, checkComps, skipExisting, usageList);
         else if (!strcmp(newType, "Spare"))
             retVal = this->AddNewNodes(pThor, XML_TAG_THORSPAREPROCESS, 0, computers, checkComps, skipExisting, usageList);
 
@@ -104,53 +108,9 @@ bool CConfigEnvHelper::handleThorTopologyOp(const char* cmd, const char* xmlArg,
             if (pProcessNode)
                 pThor->removeTree(pProcessNode);
 
-            // Delete from topology node
-            IPropertyTree* pTopoNode = pThor->queryPropTree(XML_TAG_TOPOLOGY);
-
-            if (!strcmp(type, "Slave"))
-            {
-                Owned<IPropertyTreeIterator> iter = pThor->getElements(XML_TAG_TOPOLOGY"//"XML_TAG_NODE);
-
-                ForEach(*iter)
-                {
-                    // Get macthing process node
-                    const char* szProcess = iter->query().queryProp(XML_ATTR_PROCESS);
-                    IPropertyTree* pProcessNode = GetProcessNode(pThor, szProcess);
-                    if (!strcmp(pProcessNode->queryName(), XML_TAG_THORMASTERPROCESS))
-                    {
-                        xpath.clear().appendf(XML_TAG_NODE"/["XML_ATTR_PROCESS"='%s']", process);
-                        IPropertyTree* pNode = iter->query().queryPropTree(xpath.str());
-                        if (pNode)
-                            iter->query().removeTree(pNode);
-
-                        break;
-                    }
-                }
-            }
-            else
-            {
-                xpath.clear().appendf(XML_TAG_NODE"/["XML_ATTR_PROCESS"='%s']", process);
-                IPropertyTree* pNode = pTopoNode->queryPropTree(xpath.str());
-
-                //Remove all slaves from thor
-                if (pNode && !strcmp(type, "Master"))
-                {
-                    Owned<IPropertyTreeIterator> iter = pThor->getElements(XML_TAG_TOPOLOGY"//"XML_TAG_NODE);
-
-                    ForEach(*iter)
-                    {
-                        // Get macthing process node
-                        const char* szProcess = iter->query().queryProp(XML_ATTR_PROCESS);
-                        IPropertyTree* pProcessNode = GetProcessNode(pThor, szProcess);
-                        if (pProcessNode && !strcmp(pProcessNode->queryName(), XML_TAG_THORSLAVEPROCESS))
-                            pThor->removeTree(pProcessNode);
-                    }
-                }
-
-                if (pNode)
-                    pTopoNode->removeTree(pNode);
-
-            }
+            //Remove all slaves from thor
+            if (!strcmp(type, "Master"))
+              pThor->removeProp(XML_TAG_THORSLAVEPROCESS);
         }
 
         RenameThorInstances(pThor);
@@ -1191,31 +1151,33 @@ void CConfigEnvHelper::RemoveSlaves(IPropertyTree* pRoxie, bool bLegacySlaves/*=
 
 void CConfigEnvHelper::RenameThorInstances(IPropertyTree* pThor)
 {
-    // Iterate through topology nodes
     int nSlave = 1;
     int nSpare = 1;
-    Owned<IPropertyTreeIterator> iter = pThor->getElements(XML_TAG_TOPOLOGY"//"XML_TAG_NODE);
+    IPropertyTree* pMaster = pThor->queryPropTree(XML_TAG_THORMASTERPROCESS);
+    if (pMaster)
+      pMaster->setProp(XML_ATTR_NAME, "m1");
+
+    StringBuffer sName;
+
+    Owned<IPropertyTreeIterator> iter = pThor->getElements(XML_TAG_THORSLAVEPROCESS);
     for (iter->first(); iter->isValid(); iter->next())
     {
-        // Get macthing process node
-        const char* szProcess = iter->query().queryProp(XML_ATTR_PROCESS);
-        IPropertyTree* pProcessNode = GetProcessNode(pThor, szProcess);
-        if (pProcessNode)
-        {
-            StringBuffer sName;
-            const char* szTag = pProcessNode->queryName();
-            if (strcmp(szTag, XML_TAG_THORSLAVEPROCESS) == 0)
-                sName.appendf("s%d", nSlave++);
-            else if (strcmp(szTag, XML_TAG_THORSPAREPROCESS) == 0)
-                sName.appendf("spare%d", nSpare++);
-            else if (strcmp(szTag, XML_TAG_THORMASTERPROCESS) == 0)
-                sName = "m1";
-            else continue;         
-
-            setAttribute(pProcessNode, XML_ATTR_NAME, sName);
-            setAttribute(&iter->query(), XML_ATTR_PROCESS, sName);
-        }
+      sName.clear().appendf("s%d", nSlave++);
+      setAttribute(&iter->query(), XML_ATTR_NAME, sName);
     }
+
+    iter.setown(pThor->getElements(XML_TAG_THORSPAREPROCESS));
+    for (iter->first(); iter->isValid(); iter->next())
+    {
+      sName.clear().appendf("spare%d", nSpare++);
+      setAttribute(&iter->query(), XML_ATTR_NAME, sName);
+    }
+
+    //With thor dynamic range changes, we do not need thor topology section
+    IPropertyTree* pTopology = pThor->queryPropTree(XML_TAG_TOPOLOGY);
+
+    if (pTopology)
+      pThor->removeTree(pTopology);
 }
 
 //----------------------------------------------------------------------------
@@ -1298,7 +1260,7 @@ void CConfigEnvHelper::UpdateThorAttributes(IPropertyTree* pParentNode)
 //---------------------------------------------------------------------------
 //  AddNewNodes
 //---------------------------------------------------------------------------
-bool CConfigEnvHelper::AddNewNodes(IPropertyTree* pThor, const char* szType, int nPort, IPropertyTreePtrArray& computers, bool validate, bool skipExisting, StringBuffer& usageList, int slavesPerNode)
+bool CConfigEnvHelper::AddNewNodes(IPropertyTree* pThor, const char* szType, int nPort, IPropertyTreePtrArray& computers, bool validate, bool skipExisting, StringBuffer& usageList)
 {
     // Get parent node
     IPropertyTree* pParentNode = pThor;
@@ -1319,67 +1281,15 @@ bool CConfigEnvHelper::AddNewNodes(IPropertyTree* pThor, const char* szType, int
         if (skipExisting && !CheckTopologyComputerUse(computers[i], pThor, usageList))
             continue;
 
-        for (int j = 0; j < slavesPerNode; j++)
-        {
-            StringBuffer sName;
-            sName.appendf("temp%d", i + j + 1);
-
-            // Add process node
-            IPropertyTree* pProcessNode = createPTree(szType);
-            pProcessNode->addProp(XML_ATTR_NAME, sName);
-            pProcessNode->addProp(XML_ATTR_COMPUTER, computers[i]->queryProp(XML_ATTR_NAME));
-            if (nPort != 0) pProcessNode->addPropInt(XML_ATTR_PORT, nPort);
-                addNode(pProcessNode, pThor);
-
-            //some thor "Topology" have been known to have multiple (unused) "Node" children with the same name
-            //so reuse any unused Topology/Node
-            //
-            Owned<IPropertyTreeIterator> iter = pThor->getElements(XML_TAG_TOPOLOGY"//"XML_TAG_NODE);
-            IPropertyTree* pNode = NULL;
-
-            ForEach(*iter)
-            {
-                // Get macthing process node
-                const char* szProcess = iter->query().queryProp(XML_ATTR_PROCESS);
-                IPropertyTree* pProcessNode = GetProcessNode(pThor, szProcess);
-                if (!pProcessNode)
-                {
-                    pNode = &iter->query();
-                    pNode->setProp(XML_ATTR_PROCESS, sName);
-                    break;
-                }
-            }
-
-            if (!pNode)
-            {
-                // Add topology node
-                pNode = createPTree(XML_TAG_NODE);
-                pNode->addProp(XML_ATTR_PROCESS, sName);
-
-                IPropertyTree* pTopoNode = pThor->queryPropTree(XML_TAG_TOPOLOGY);
-                if (!pTopoNode)
-                    pTopoNode = pThor->addPropTree(XML_TAG_TOPOLOGY, createPTree());
+        StringBuffer sName;
+        sName.appendf("temp%d", i + 1);
 
-                if (!strcmp(szType, XML_TAG_THORSLAVEPROCESS))
-                {
-                    Owned<IPropertyTreeIterator> iter = pThor->getElements(XML_TAG_TOPOLOGY"//"XML_TAG_NODE);
-
-                    ForEach(*iter)
-                    {
-                        // Get macthing process node
-                        const char* szProcess = iter->query().queryProp(XML_ATTR_PROCESS);
-                        IPropertyTree* pProcessNode = GetProcessNode(pThor, szProcess);
-                        if (!strcmp(pProcessNode->queryName(), XML_TAG_THORMASTERPROCESS))
-                        {
-                            addNode(pNode, &iter->query());
-                            break;
-                        }
-                    }
-                }
-                else
-                    addNode(pNode, pTopoNode);
-            }
-        }
+        // Add process node
+        IPropertyTree* pProcessNode = createPTree(szType);
+        pProcessNode->addProp(XML_ATTR_NAME, sName);
+        pProcessNode->addProp(XML_ATTR_COMPUTER, computers[i]->queryProp(XML_ATTR_NAME));
+        if (nPort != 0) pProcessNode->addPropInt(XML_ATTR_PORT, nPort);
+            addNode(pProcessNode, pThor);
     }
 
     RenameThorInstances(pThor);

+ 1 - 1
deployment/deployutils/configenvhelper.hpp

@@ -74,7 +74,7 @@ private:
     void RemoveSlaves(IPropertyTree* pRoxie, bool bLegacySlaves/*=false*/);
     void RenameThorInstances(IPropertyTree* pThor);
     void UpdateThorAttributes(IPropertyTree* pParentNode);
-    bool AddNewNodes(IPropertyTree* pThor, const char* szType, int nPort, IPropertyTreePtrArray& computers, bool validate, bool skipExisting, StringBuffer& usageList, int slavesPerNode = 1);
+    bool AddNewNodes(IPropertyTree* pThor, const char* szType, int nPort, IPropertyTreePtrArray& computers, bool validate, bool skipExisting, StringBuffer& usageList);
     bool CheckTopologyComputerUse(IPropertyTree* pComputerNode, IPropertyTree* pParentNode, StringBuffer& usageList) const;
     IPropertyTree* GetProcessNode(IPropertyTree* pThor, const char* szProcess) const;
     Linked<IPropertyTree> m_pRoot;

+ 23 - 1
esp/files/scripts/configmgr/configmgr.js

@@ -2356,6 +2356,20 @@ function onMenuItemClickThorTopology(p_sType, p_aArgs, p_oValue) {
   var selRows = dt.getSelectedRows();
   var menuItemName = this.cfg.getProperty("text");
   var record = recSet.getRecord(selRows[0]);
+  var slavesPresent = false;
+  var recSetLen = recSet.getLength();
+  for (var i = 0; i < recSetLen; i++) {
+      var r = recSet.getRecord(i);
+      if (r.getData('process') === 'Slave') {
+        slavesPresent = true;
+        break;
+      }
+  }
+
+  var slavesPerNode = getAttrValFromArr(rows.Attributes, 'slavesPerNode');
+  if (slavesPerNode === "")
+    slavesPerNode = "1";
+
   var type;
   //use 'name' instead of '_key' for complex tables
   var recName = record.getData('name');
@@ -2368,7 +2382,7 @@ function onMenuItemClickThorTopology(p_sType, p_aArgs, p_oValue) {
     type = "Master";
   else if (menuItemName === "Add Spares...")
     type = "Spare";
-  top.document.navDT.promptThorTopology(top.document.navDT, type);
+  top.document.navDT.promptThorTopology(top.document.navDT, type, slavesPresent, slavesPerNode);
 }
 function onContextMenuBeforeShow(p_sType, p_aArgs) {
   if (!this.configContextMenuItems) {
@@ -3150,6 +3164,14 @@ function isPresentInArr(arr, val) {
   return false;
 }
 
+function getAttrValFromArr(arr, attr) {
+  for (idx = 0; idx < arr.length; idx++)
+    if (arr[idx]['name'] && arr[idx]['value'] && arr[idx]['name'] === attr)
+    return arr[idx]['value'];
+
+  return "";
+}
+
 //only to be called from unload page
 function handleunload(fromUnload) {
   if (top.window.frames["DisplaySettingsFrame"].event && 

+ 10 - 4
esp/files/scripts/configmgr/navtree.js

@@ -2726,7 +2726,7 @@ function populateSelectComputersPanel(paramdt)
   getFileName(true) + 'Cmd=Farms');
 }
 
-function initSelectComputersPanel(paramdt, fnSave, enableNumNodes) {
+function initSelectComputersPanel(paramdt, fnSave, enableNumNodes, slavesPresent, slavesPerNode) {
   if (!paramdt.selectComputersPanel) {
     paramdt.selectComputersPanel = new YAHOO.widget.Dialog("selectComputersPanel",
                             { width: "520px",
@@ -2817,9 +2817,10 @@ function initSelectComputersPanel(paramdt, fnSave, enableNumNodes) {
       newtext.innerHTML = "Number of thor slaves per node(default 1): ";
       var aTextBox = document.createElement('input');
       aTextBox.type = 'text';
-      aTextBox.value = '1';
+      aTextBox.value = slavesPerNode;
       aTextBox.id = 'slavesPerNode';
       aTextBox.style.width = "50";
+      aTextBox.disabled = slavesPresent;
       newdiv.appendChild(newtext);
       newdiv.appendChild(aTextBox);
       var nodes = document.getElementById("selectComputersPanel").childNodes;
@@ -2840,6 +2841,11 @@ function initSelectComputersPanel(paramdt, fnSave, enableNumNodes) {
     if (document.getElementById('slavesPerNodeDiv'))
       document.getElementById('slavesPerNodeDiv').style.display = 'none';
   }
+
+  if (document.getElementById('slavesPerNode')) {
+    document.getElementById('slavesPerNode').disabled = slavesPresent;
+    document.getElementById('slavesPerNode').value = slavesPerNode;
+  }
 }
 
 function initReplaceRoxieNodesPanel() {
@@ -3069,7 +3075,7 @@ function thorInstSelToXML(self, selectedRows, paramdt, type, validateComputers,
   return xmlStr;
 }
 
-function promptThorTopology(self, type) {
+function promptThorTopology(self, type, slavesPresent, slavesPerNode) {
   var tmpdt = self;
   var handleSubmit = function() {
     var selRows = selectComputersDTDiv.selectComputersTable.getUserSelectedRows();
@@ -3180,7 +3186,7 @@ function promptThorTopology(self, type) {
     },
       getFileName(true) + 'Operation=Add&Type=' + type + '&XmlArgs=' + xmlStr);
   }
-  initSelectComputersPanel(tmpdt, handleSubmit, type==="Slave" ? true : false);
+  initSelectComputersPanel(tmpdt, handleSubmit, type==="Slave" ? true : false, slavesPresent, slavesPerNode);
   document.getElementById('selectComputersPanel').style.display = 'block';
   tmpdt.selectComputersPanel.render(document.body);
   tmpdt.selectComputersPanel.center();

+ 39 - 59
esp/services/WsDeploy/WsDeployService.cpp

@@ -59,63 +59,6 @@ bool supportedInEEOnly()
   throw MakeStringException(-1, "This operation is supported in Enterprise and above editions only. Please contact HPCC Systems at http://www.hpccsystems.com/contactus");
 }
 
-bool updateDaliEnv(IPropertyTree *env)
-{
-  if (!env)
-    return false;
-  const char *s = env->queryName();
-  if (!s||(strcmp(s, XML_TAG_ENVIRONMENT)!=0))
-    throw MakeStringException(-1,"Environment is invalid");
-  Owned<IPropertyTreeIterator> dalis = env->getElements("Software/DaliServerProcess/Instance");
-  if (!dalis||!dalis->first())
-    throw MakeStringException(-1,"Could not find DaliServerProcess in Environment");
-  SocketEndpoint daliep;
-  const char *ps = dalis->get().queryProp("@port");
-  unsigned port = ps?atoi(ps):0;
-  if (!port)
-    port = DALI_SERVER_PORT;
-  daliep.set(dalis->get().queryProp(XML_ATTR_NETADDRESS),port);
-  if (dalis->next())
-    throw MakeStringException(-1,"Ambiguous DaliServerProcess in Environment");
-  if (daliep.isNull())
-    throw MakeStringException(-1,"Could not find DaliServerProcess instance in Environment");
-  SocketEndpointArray epa;
-  epa.append(daliep);
-  Owned<IGroup> group = createIGroup(epa);
-
-  bool ret = initClientProcess(group, DCR_Util, 0, 0, 0, 1000);
-
-  if (!ret)
-  {
-    closedownClientProcess();
-    return ret;
-  }
-
-  try {
-    Owned<IRemoteConnection> conn = querySDS().connect("/",myProcessSession(),0, INFINITE);
-    if (conn) {
-      Owned<IPropertyTree> root = conn->getRoot();
-      Owned<IPropertyTree> child = root->getPropTree(XML_TAG_ENVIRONMENT);
-      if (child.get())
-        root->removeTree(child);
-      root->addPropTree(XML_TAG_ENVIRONMENT,LINK(env));
-      root.clear();
-      conn->commit();
-      conn->close();
-      initClusterGroups();
-    }
-    else
-      throw MakeStringException(-1,"Could not connect to dali root");
-  }
-  catch (IException *) {
-    closedownClientProcess();
-    throw;
-  }
-  closedownClientProcess();
-
-  return true;
-}
-
 void substituteParameters(const IPropertyTree* pEnv, const char *xpath, IPropertyTree* pNode, StringBuffer& result) 
 {
   while (*xpath)
@@ -3145,6 +3088,41 @@ bool CWsDeployFileInfo::displaySettings(IEspContext &context, IEspDisplaySetting
       else if (!strcmp(pszCompType, XML_TAG_THORCLUSTER))
       {
         Owned<IPropertyTree> pSrcTree = createPTreeFromXMLString(xml);
+        IPropertyTree* pTopoNode = pSrcTree->queryPropTree(XML_TAG_TOPOLOGY);
+
+        if (!pTopoNode)
+        {
+          pTopoNode = pSrcTree->addPropTree(XML_TAG_TOPOLOGY, createPTree());
+          IPropertyTree* pMaster = pSrcTree->queryPropTree(XML_TAG_THORMASTERPROCESS);
+
+          if (pMaster)
+          {
+            IPropertyTree* pMasterNode = createPTree(XML_TAG_NODE);
+            pMasterNode->addProp(XML_ATTR_PROCESS, pMaster->queryProp(XML_ATTR_NAME));
+            pTopoNode->addPropTree(XML_TAG_NODE, pMasterNode);
+
+            Owned<IPropertyTreeIterator> iterSlaves = pSrcTree->getElements(XML_TAG_THORSLAVEPROCESS);
+
+            ForEach (*iterSlaves)
+            {
+              IPropertyTree* pSlave = &iterSlaves->query();
+              IPropertyTree* pNode = createPTree(XML_TAG_NODE);
+              pNode->addProp(XML_ATTR_PROCESS, pSlave->queryProp(XML_ATTR_NAME));
+              pMasterNode->addPropTree(XML_TAG_NODE, pNode);
+            }
+          }
+
+          Owned<IPropertyTreeIterator> iterSpares = pSrcTree->getElements(XML_TAG_THORSPAREPROCESS);
+
+          ForEach (*iterSpares)
+          {
+            IPropertyTree* pSpare = &iterSpares->query();
+            IPropertyTree* pNode = createPTree(XML_TAG_NODE);
+            pNode->addProp(XML_ATTR_PROCESS, pSpare->queryProp(XML_ATTR_NAME));
+            pTopoNode->addPropTree(XML_TAG_NODE, pNode);
+          }
+        }
+
         xpath.clear().append("Topology/Node");
         Owned<IPropertyTreeIterator> iterMNodes = pSrcTree->getElements(xpath.str());
 
@@ -5375,14 +5353,16 @@ void CWsDeployFileInfo::saveEnvironment(IEspContext* pContext, IConstWsDeployReq
     m_constEnvRdOnly.setown(factory->loadLocalEnvironment(sXML.str()));
 
     if (valerrs.length())
-      errMsg.appendf("Save operation was successful. However the following exceptions were raised.\n%s", valerrs.str());
+      errMsg.appendf("CWsDeployFileInfo::saveEnvironment:Save operation was successful. However the following exceptions were raised.\n%s", valerrs.str());
   }
   else
   {
     try
     {
       m_Environment->commit();
-      initClusterGroups();
+      StringBuffer response;
+      if (!initClusterGroups(false, response))
+        WARNLOG("CWsDeployFileInfo::saveEnvironment: some groups clash and were not updated : %s", response.str());
     }
     catch (IException* e)
     {

+ 1 - 1
initfiles/componentfiles/configxml/Environment.xsd

@@ -348,7 +348,7 @@
                                                 </xs:element>
                                                 <xs:element name="Topology">
                                                     <xs:annotation>
-                                                        <xs:documentation>Describes the Topology of the given thor cluster</xs:documentation>
+                                                        <xs:documentation>Describes the jjjTopology of the given thor cluster</xs:documentation>
                                                     </xs:annotation>
                                                     <xs:complexType>
                                                         <xs:sequence maxOccurs="unbounded">

+ 9 - 0
initfiles/componentfiles/configxml/dali.xsd

@@ -285,6 +285,15 @@
       </xs:annotation>
     </xs:attribute>
   </xs:attributeGroup>
+   <xs:attributeGroup name="DFS">
+    <xs:attribute name="forceGroupUpdate" use="optional" default="false">
+      <xs:annotation>
+        <xs:appinfo>
+          <tooltip>Force group updates on startup, if environment mismatch</tooltip>
+        </xs:appinfo>
+      </xs:annotation>
+    </xs:attribute>
+   </xs:attributeGroup>
    <xs:attributeGroup name="LDAP">
     <xs:attribute name="ldapServer" type="ldapServerType" use="optional">
       <xs:annotation>

+ 3 - 0
initfiles/componentfiles/configxml/dali.xsl

@@ -173,6 +173,9 @@
           </xsl:call-template>
         </xsl:attribute>
       </xsl:element>
+      <xsl:element name="DFS">
+        <xsl:copy-of select="@forceGroupUpdate"/>
+      </xsl:element>
       <xsl:element name="Coven">
         <xsl:attribute name="store">dalicoven.xml</xsl:attribute>
         <xsl:element name="Alerts">

+ 9 - 10
initfiles/componentfiles/configxml/setvars_linux.xsl

@@ -71,8 +71,16 @@ export THORSLAVEPORT=<xsl:value-of select="@slaveport"/>
 export THORSLAVEPORT=6600
     </xsl:otherwise>
 </xsl:choose>
+<xsl:if test="string(@localThorPortInc) != ''">
+export localthorportinc=<xsl:value-of select="@localThorPortInc"/>
+</xsl:if>
 export domain=<xsl:value-of select="$domainName"/>
-export thor=<xsl:value-of select="@slaves"/>
+<xsl:if test="string(@slavesPerNode) != ''">
+export slavespernode=<xsl:value-of select="@slavesPerNode"/>
+</xsl:if>
+<xsl:if test="string(@multiSlaves) != ''">
+export multislaves=<xsl:value-of select="@multiSlaves"/>
+</xsl:if>
 <xsl:if test="string($thoruser) != ''">
 export THORUSER=<xsl:value-of select="$thoruser"/>
 </xsl:if>
@@ -87,15 +95,6 @@ export DALISERVER=<xsl:call-template name="getDaliServers">
 <xsl:if test="string(@localThor) != ''">
 export localthor=<xsl:value-of select="@localThor"/>
 </xsl:if>
-<xsl:if test="string(@localThorPortBase) != ''">
-export localthorportbase=<xsl:value-of select="@localThorPortBase"/>
-</xsl:if>
-<xsl:if test="string(@localThorPortInc) != ''">
-export localthorportinc=<xsl:value-of select="@localThorPortInc"/>
-</xsl:if>
-<xsl:if test="string(@multiSlaves) != ''">
-export multislaves=<xsl:value-of select="@multiSlaves"/>
-</xsl:if>
 <xsl:if test="string(Storage/@breakoutLimit) != ''">
 export breakoutlimit=<xsl:value-of select="Storage/@breakoutLimit"/>
 </xsl:if>

+ 4 - 4
initfiles/componentfiles/configxml/thor.xsd.in

@@ -399,17 +399,17 @@
           </xs:appinfo>
         </xs:annotation>
       </xs:attribute>
-      <xs:attribute name="localThorPortInc" type="xs:nonNegativeInteger" use="optional" default="2000">
+      <xs:attribute name="slavesPerNode" type="xs:nonNegativeInteger" use="optional" default="1">
         <xs:annotation>
           <xs:appinfo>
-            <tooltip>Port increment between local thor slaves (when localThor set)</tooltip>
+            <tooltip>This allows multiple slaves to exist on each node</tooltip>
           </xs:appinfo>
         </xs:annotation>
       </xs:attribute>
-      <xs:attribute name="multiSlaves" type="xs:boolean" use="optional" default="false">
+      <xs:attribute name="localThorPortInc" type="xs:nonNegativeInteger" use="optional" default="2000">
         <xs:annotation>
           <xs:appinfo>
-            <tooltip>Generate different port number using slaveport above as base - this allows multiple slaves to exist on each node</tooltip>
+            <tooltip>Port increment between slaves on same node</tooltip>
           </xs:appinfo>
         </xs:annotation>
       </xs:attribute>

+ 0 - 3
initfiles/componentfiles/configxml/thor.xsl

@@ -108,9 +108,6 @@
       <xsl:attribute name="name">
         <xsl:value-of select="@name"/>
       </xsl:attribute> 
-      <xsl:attribute name="slaves">
-        <xsl:value-of select="count(ThorSlaveProcess)"/>
-      </xsl:attribute>
       <xsl:attribute name="nodeGroup">
         <xsl:choose>
           <xsl:when test="string(@nodeGroup) = ''">

+ 28 - 13
initfiles/componentfiles/thor/makethorgroup

@@ -17,21 +17,36 @@
 ################################################################################
 
 
-if [ -z "$localthorportbase" ]; then
-  export localthorportbase=$THORSLAVEPORT
-fi
 if [ -z "$localthorportinc" ]; then
-  export localthorportinc=2000
+  export localthorportinc=200
 fi
 rm -f thorgroup
-lastslave=nop
+
+function contains() {
+    local n=$#
+    local value=${!n}
+    for ((i=1;i < ${n};i++)) {
+        if [ "${!i}" == "${value}" ]; then
+            echo "${i}"
+            return 0
+        fi
+    }
+    echo "0"
+    return 1
+}
+
+declare -a ports_used
+declare -a hosts
 for slave in $(cat slaves); do
- if [ "$slave" != "$lastslave" ]; then
-  let "base = $localthorportbase";
- else
-  let "base += $localthorportinc";
- fi;
- echo $slave:$base >> thorgroup
- lastslave=$slave;
+    p=$(contains "${hosts[@]}" "${slave}")
+    if [ 0 == ${p} ]; then
+        echo "${slave}:${THORSLAVEPORT}" >> thorgroup
+        p=$((${#hosts[@]}+1))
+        ports[${p}]=${THORSLAVEPORT}
+        hosts=(${hosts[@]} $slave)
+    else
+        newport=$((${ports[${p}]}+${localthorportinc}))
+        echo "${slave}:${newport}" >> thorgroup
+        ports[${p}]=${newport}
+    fi
 done
-

+ 1 - 2
initfiles/componentfiles/thor/run_thor

@@ -34,7 +34,6 @@ echo $$ > run_thor.lck
 
 export SENTINEL="thor.sentinel"
 while [ 1 ]; do
-    export logpthtail="`date +%m_%d_%Y_%H_%M_%S`"  
     export logpth="$logdir/$logpthtail"
     $deploydir/start_slaves
 
@@ -69,5 +68,5 @@ while [ 1 ]; do
         echo $SENTINEL 'has been removed or thormaster did not fully start - script stopping'
         exit 0
     fi
+    export logpthtail="`date +%m_%d_%Y_%H_%M_%S`"
 done
-

+ 1 - 1
initfiles/componentfiles/thor/start_backupnode.in

@@ -55,7 +55,7 @@ if [ -z "$BACKUPNODE_DATA" ]; then
     exit 1
 fi
 if [ ! -r $INSTANCE_DIR/slaves ]; then
-    echo cannot read $INSTANCE_DIR/slaves 
+    echo cannot read $INSTANCE_DIR/slaves
     exit 1
 fi
 mkdir -p $BACKUPNODE_DATA

+ 2 - 1
initfiles/componentfiles/thor/start_slaves

@@ -32,7 +32,8 @@ if [ "$localthor" = "true" ]; then
         let "n += 1";
         done
 else
-        if [ "$multislaves" = "true" ]; then
+        ## multislaves for bkwd compat. with old environments
+        if [ ${slavespernode} -gt 1 ] || [ "$multislaves" = "true" ]; then
             let "n = 1";
             for slave in $(cat $instancedir/thorgroup); do
                 ip=${slave/:*/}

+ 22 - 9
initfiles/componentfiles/thor/start_thor

@@ -45,26 +45,39 @@ $deploydir/stop_thor $deploydir
 
 # ----------------------------
 
+ulimit -n 8192
+if [ `ulimit -n` -lt 8192 ]; then
+    echo 'ulimit -n failed, aborting start_thor (perhaps you are not logged is as super user?)'
+    exit 0
+fi
+
+if [ ! -z ${THORPRIMARY} ]; then
+    groupName=${THORPRIMARY}
+else
+    groupName=${THORNAME}
+fi
+daliadmin $DALISERVER dfsgroup ${groupName} > slaves
+errcode=$?
+if [ 0 != ${errcode} ]; then
+    echo 'failed to lookup dali group for $groupName'
+    exit 1
+fi
+
 ln -s -f $deploydir/thormaster${LCR} thormaster_$THORMASTERPORT
 
 if [ "$localthor" = "true" ]; then
     ln -s -f $deploydir/thorslave${LCR} thorslave_$THORSLAVEPORT
-        . $deploydir/makethorgroup
 fi
-if [ "$multislaves" = "true" ]; then
+## multislaves for bkwd compat. with old environments
+if [ ${slavespernode} -gt 1 ] || [ "$localthor" = "true" ] || [ "$multislaves" = "true" ]; then
     . $deploydir/makethorgroup
 fi
 
-ulimit -n 8192
-if [ `ulimit -n` -lt 8192 ]; then
-    echo 'ulimit -n failed, aborting start_thor (perhaps you are not logged is as super user?)'
-    exit 0
-fi
-
 ENV_DIR=`cat ${HPCC_CONFIG} | sed -n "/\[DEFAULT\]/,/\[/p" | grep "^configs=" | sed -e 's/^configs=//'`
 export logdir=`updtdalienv $ENV_DIR/environment.xml -d log thor $THORNAME`
 if [ -z "$logdir" ]; then
 export logdir="./start_logs"
 fi
 mkdir -p $logdir
-$deploydir/run_thor 1>>$logdir/start_thor_$logpthtail.log 2>>$logdir/start_thor_$logpthtail.log
+export logpthtail="`date +%m_%d_%Y_%H_%M_%S`"
+$deploydir/run_thor >> $logdir/start_thor_$logpthtail.log 2>&1

+ 0 - 6
initfiles/etc/DIR_NAME/environment.xml.in

@@ -1249,7 +1249,6 @@
                description="Thor process"
                localThor="true"
                monitorDaliFileServer="true"
-               multiSlaves="false"
                name="mythor"
                pluginsPath="${PLUGINS_PATH}"
                replicateAsync="false"
@@ -1267,11 +1266,6 @@
    <SwapNode/>
    <ThorMasterProcess computer="localhost" name="m1"/>
    <ThorSlaveProcess computer="localhost" name="s1"/>
-   <Topology>
-    <Node process="m1">
-     <Node process="s1"/>
-    </Node>
-   </Topology>
   </ThorCluster>
   <Topology build="${CPACK_RPM_PACKAGE_VERSION}_${CPACK_RPM_PACKAGE_RELEASE}" buildSet="topology" name="topology">
    <Cluster name="hthor" prefix="hthor">

+ 5 - 3
system/jlib/jfile.cpp

@@ -5152,7 +5152,7 @@ bool unmountDrive(const char *drv)
 
 }
 
-IFileIO *createUniqueFile(const char *dir, const char *prefix, StringBuffer &filename)
+IFileIO *createUniqueFile(const char *dir, const char *prefix, const char *ext, StringBuffer &filename)
 {
     CDateTime dt;
     dt.setNow();
@@ -5166,7 +5166,9 @@ IFileIO *createUniqueFile(const char *dir, const char *prefix, StringBuffer &fil
         filename.append(prefix);
     else
         filename.append("uniq");
-    filename.appendf("_%"I64F"x.%x.%x.tmp", (__int64)GetCurrentThreadId(), (unsigned)GetCurrentProcessId(), t);
+    if (!ext || !*ext)
+        ext = "tmp";
+    filename.appendf("_%"I64F"x.%x.%x.%s", (__int64)GetCurrentThreadId(), (unsigned)GetCurrentProcessId(), t, ext);
     OwnedIFile iFile = createIFile(filename.str());
     IFileIO *iFileIO = NULL;
     unsigned attempts = 5; // max attempts
@@ -5184,7 +5186,7 @@ IFileIO *createUniqueFile(const char *dir, const char *prefix, StringBuffer &fil
         if (0 == --attempts)
             break;
         t += getRandom();
-        filename.clear().appendf("uniq_%"I64F"x.%x.%x.tmp", (__int64)GetCurrentThreadId(), (unsigned)GetCurrentProcessId(), t);
+        filename.clear().appendf("uniq_%"I64F"x.%x.%x.%s", (__int64)GetCurrentThreadId(), (unsigned)GetCurrentProcessId(), t, ext);
         iFile.setown(createIFile(filename.str()));
     }
     return NULL;

+ 1 - 1
system/jlib/jfile.hpp

@@ -585,7 +585,7 @@ extern jlib_decl void extractBlobElements(const char * prefix, const RemoteFilen
 extern jlib_decl bool mountDrive(const char *drv,const RemoteFilename &rfn); // linux only currently
 extern jlib_decl bool unmountDrive(const char *drv); // linux only currently
 
-extern jlib_decl IFileIO *createUniqueFile(const char *dir, const char *prefix, StringBuffer &tmpName);
+extern jlib_decl IFileIO *createUniqueFile(const char *dir, const char *prefix, const char *ext, StringBuffer &tmpName);
 
 
 // used by remote copy