Browse Source

HPCC-22221 hosts in groups

Signed-off-by: Jake Smith <jake.smith@lexisnexisrisk.com>
Jake Smith 6 years ago
parent
commit
2434575bba

+ 32 - 42
common/environment/environment.cpp

@@ -168,7 +168,7 @@ private:
     // NOTE - order is important - we need to construct before p and (especially) destruct after p
     Owned<IRemoteConnection> conn;
     Owned<IPropertyTree> p;
-    mutable MapStringToMyClass<IConstEnvBase> cache;
+    mutable MapStringToMyClass<IConstEnvBase> cache; // NB: map of 'MappingStringToIInterface' that Link's the added IConstEnvBase, and Release's on element removal.
     mutable Mutex safeCache;
     mutable bool dropZoneCacheBuilt;
     mutable bool machineCacheBuilt;
@@ -307,7 +307,7 @@ public:
     virtual IEnvironment& lock() const;
     virtual IConstDomainInfo * getDomain(const char * name) const;
     virtual IConstMachineInfo * getMachine(const char * name) const;
-    virtual IConstMachineInfo * getMachineByAddress(const char * machineIp) const;
+    virtual IConstMachineInfo * getMachineByAddress(const char * hostOrIP) const;
     virtual IConstMachineInfo * getMachineForLocalHost() const;
     virtual IConstDropZoneInfo * getDropZone(const char * name) const;
     virtual IConstInstanceInfo * getInstance(const char * type, const char * version, const char *domain) const;
@@ -462,8 +462,8 @@ public:
             { return c->getDomain(name); }
     virtual IConstMachineInfo * getMachine(const char * name) const
             { return c->getMachine(name); }
-    virtual IConstMachineInfo * getMachineByAddress(const char * machineIp) const
-            { return c->getMachineByAddress(machineIp); }
+    virtual IConstMachineInfo * getMachineByAddress(const char * hostOrIP) const
+            { return c->getMachineByAddress(hostOrIP); }
     virtual IConstMachineInfo * getMachineForLocalHost() const
             { return c->getMachineForLocalHost(); }
     virtual IConstDropZoneInfo * getDropZone(const char * name) const
@@ -1463,12 +1463,12 @@ void CLocalEnvironment::buildMachineCache() const
         Owned<IPropertyTreeIterator> it = p->getElements("Hardware/Computer");
         ForEach(*it)
         {
+            Owned<IConstEnvBase> cached = new CConstMachineInfo((CLocalEnvironment *) this, &it->query());
             const char *name = it->query().queryProp("@name");
             if (name)
             {
                 StringBuffer x("Hardware/Computer[@name=\"");
                 x.append(name).append("\"]");
-                Owned<IConstEnvBase> cached = new CConstMachineInfo((CLocalEnvironment *) this, &it->query());
                 cache.setValue(x.str(), cached);
             }
             const char * netAddress = it->query().queryProp("@netAddress");
@@ -1476,7 +1476,6 @@ void CLocalEnvironment::buildMachineCache() const
             {
                 StringBuffer x("Hardware/Computer[@netAddress=\"");
                 x.append(netAddress).append("\"]");
-                Owned<IConstEnvBase> cached = new CConstMachineInfo((CLocalEnvironment *) this, &it->query());
                 cache.setValue(x.str(), cached);
 
                 IpAddress ip;
@@ -1487,7 +1486,6 @@ void CLocalEnvironment::buildMachineCache() const
             numOfMachines++;
             StringBuffer x("Hardware/Computer[@id=\"");
             x.append(MACHINE_PREFIX).append(numOfMachines).append("\"]");
-            Owned<IConstEnvBase> cached = new CConstMachineInfo((CLocalEnvironment *) this, &it->query());
             cache.setValue(x.str(), cached);
         }
         machineCacheBuilt = true;
@@ -1589,53 +1587,45 @@ IConstMachineInfo * CLocalEnvironment::getMachine(const char * name) const
     return (CConstMachineInfo *) cached;
 }
 
-IConstMachineInfo * CLocalEnvironment::getMachineByAddress(const char * machineIp) const
+IConstMachineInfo * CLocalEnvironment::getMachineByAddress(const char * hostOrIP) const
 {
-    if (!machineIp)
+    if (isEmptyString(hostOrIP))
         return nullptr;
     buildMachineCache();
-    Owned<IPropertyTreeIterator> iter;
-    StringBuffer xpath;
-    xpath.appendf("Hardware/Computer[@netAddress=\"%s\"]", machineIp);
+
     synchronized procedure(safeCache);
-    IConstEnvBase *cached = getCache(xpath.str());
-    if (!cached)
+
+    VStringBuffer xpath("Hardware/Computer[@netAddress=\"%s\"]", hostOrIP);
+    IConstEnvBase *cached = getCache(hostOrIP);
+    if (cached)
+        return (CConstMachineInfo *) cached;
+
+    IPropertyTree *d = p->queryPropTree(xpath); // exact match
+    if (!d && !isIPAddress(xpath)) // if not found and not an IP, resolve and match against resolved entries
     {
-        IPropertyTree *d = p->queryPropTree(xpath.str());
-        if (!d)
+        IpAddress ip(hostOrIP);
+        Owned<IPropertyTreeIterator> iter = p->getElements("Hardware/Computer");
+        ForEach(*iter)
         {
-            // I suspect not in the original spirit of this but look for resolved IP
-            Owned<IPropertyTreeIterator> iter = p->getElements("Hardware/Computer");
-            IpAddress ip;
-            ip.ipset(machineIp);
-            ForEach(*iter)
+            IPropertyTree &computer = iter->query();
+            IpAddress computerIP;
+            const char *computerNetAddress = computer.queryProp("@netAddress");
+            if (!isEmptyString(computerNetAddress))
             {
-                IPropertyTree &computer = iter->query();
-                IpAddress ip2;
-                const char *ips = computer.queryProp("@netAddress");
-                if (ips&&*ips)
+                // NB: could 1st check if computerNetAddress isIPAddress() and not bother resolving here if it is.
+                computerIP.ipset(computerNetAddress);
+                if (ip.ipequals(computerIP))
                 {
-                    ip2.ipset(ips);
-                    if (ip.ipequals(ip2))
-                    {
-                        d = &computer;
-                        break;
-                    }
+                    d = &computer;
+                    break;
                 }
             }
         }
-        if (!d)
-            return nullptr;
-        StringBuffer xpath1;
-        xpath1.appendf("Hardware/Computer[@name=\"%s\"]", d->queryProp("@name"));
-        cached = getCache(xpath1.str());
-        if (!cached)
-        {
-            cached = new CConstMachineInfo((CLocalEnvironment *) this, d);
-            setCache(xpath1.str(), cached);
-            setCache(xpath.str(), cached);
-        }
     }
+    if (!d)
+        return nullptr;
+    cached = new CConstMachineInfo((CLocalEnvironment *) this, d);
+    setCache(xpath.str(), cached);
     return (CConstMachineInfo *) cached;
 }
 

+ 1 - 1
common/environment/environment.hpp

@@ -174,7 +174,7 @@ interface IConstEnvironment : extends IConstEnvBase
 {
     virtual IConstDomainInfo * getDomain(const char * name) const = 0;
     virtual IConstMachineInfo * getMachine(const char * name) const = 0;
-    virtual IConstMachineInfo * getMachineByAddress(const char * netaddress) const = 0;
+    virtual IConstMachineInfo * getMachineByAddress(const char * hostOrIP) const = 0;
     virtual IConstMachineInfo * getMachineForLocalHost() const = 0;
     virtual IConstDropZoneInfo * getDropZone(const char * name) const = 0;
     virtual IConstInstanceInfo * getInstance(const char * type, const char * version, const char * domain) const = 0;

File diff suppressed because it is too large
+ 511 - 341
dali/base/dadfs.cpp


+ 10 - 8
dali/base/dadfs.hpp

@@ -39,6 +39,8 @@
 #include "dafdesc.hpp"
 #include "seclib.hpp"
 
+#include <vector>
+
 typedef __int64 DistributedLockID;
 #define FOREIGN_DALI_TIMEOUT (1000*60*5)
 
@@ -710,16 +712,16 @@ interface INamedGroupIterator: extends IInterface
     virtual StringBuffer &getdir(StringBuffer &dir) = 0;
 };
 
-interface INamedGroupStore: implements IGroupResolver
+interface INamedGroupStore : extends IGroupResolver
 {
     virtual IGroup *lookup(const char *logicalgroupname) = 0;
     virtual INamedGroupIterator *getIterator() = 0;
     virtual INamedGroupIterator *getIterator(IGroup *match, bool exact=false) = 0;
-    virtual void add(const char *logicalgroupname,IGroup *group, bool cluster=false, const char *dir=NULL, GroupType groupType = grp_unknown) = 0;
+    virtual void add(const char *logicalgroupname, const std::vector<std::string> &hosts, bool cluster=false, const char *dir=NULL, GroupType groupType = grp_unknown) = 0;
+    virtual unsigned removeNode(const char *logicalgroupname, const char *nodeToRemove) = 0;
     virtual void remove(const char *logicalgroupname) = 0;
-    virtual bool find(IGroup *grp, StringBuffer &lname, bool add=false) = 0;
     virtual void addUnique(IGroup *group,StringBuffer &lname,const char *dir=NULL) = 0;
-    virtual void swapNode(const IpAddress &from, const IpAddress &to) = 0;
+    virtual void swapNode(const char *from, const char *to) = 0;
     virtual IGroup *lookup(const char *logicalgroupname, StringBuffer &dir, GroupType &groupType) = 0;
     virtual unsigned setDefaultTimeout(unsigned timems) = 0;     // sets default timeout for SDS connections and locking
     virtual unsigned setRemoteTimeout(unsigned timems) = 0;      // sets default timeout for remote SDS connections and locking
@@ -788,11 +790,11 @@ extern da_decl IDaliServer *createDaliDFSServer(IPropertyTree *config); // calle
 // to initialize clustergroups after clusters change in the environment
 extern da_decl void initClusterGroups(bool force, StringBuffer &response, IPropertyTree *oldEnvironment, unsigned timems=INFINITE);
 extern da_decl bool resetClusterGroup(const char *clusterName, const char *type, bool spares, StringBuffer &response, unsigned timems=INFINITE);
-extern da_decl bool addClusterSpares(const char *clusterName, const char *type, SocketEndpointArray &eps, StringBuffer &response, unsigned timems=INFINITE);
-extern da_decl bool removeClusterSpares(const char *clusterName, const char *type, SocketEndpointArray &eps, StringBuffer &response, unsigned timems=INFINITE);
+extern da_decl bool addClusterSpares(const char *clusterName, const char *type, const std::vector<std::string> &hosts, StringBuffer &response, unsigned timems=INFINITE);
+extern da_decl bool removeClusterSpares(const char *clusterName, const char *type, const std::vector<std::string> &hosts, StringBuffer &response, unsigned timems=INFINITE);
 // should poss. belong in lib workunit
-extern da_decl StringBuffer &getClusterGroupName(IPropertyTree &cluster, StringBuffer &groupName);
-extern da_decl StringBuffer &getClusterSpareGroupName(IPropertyTree &cluster, StringBuffer &groupName);
+extern da_decl StringBuffer &getClusterGroupName(const IPropertyTree &cluster, StringBuffer &groupName);
+extern da_decl StringBuffer &getClusterSpareGroupName(const IPropertyTree &cluster, StringBuffer &groupName);
 extern da_decl IGroup *getClusterNodeGroup(const char *clusterName, const char *type, unsigned timems=INFINITE); // returns the raw cluster group (as defined in the Cluster topology)
 extern da_decl IGroup *getClusterProcessNodeGroup(const char *clusterName, const char *type, unsigned timems=INFINITE); // returns the group of all processes of cluster group (i.e. cluster group * slavesPerNode)
 

+ 9 - 7
dali/dalidiag/CMakeLists.txt

@@ -29,13 +29,14 @@ set (    SRCS
     )
 
 include_directories ( 
-         ./../../common/remote 
-         ./../server 
-         ./../base 
-         ./../../system/mp 
-         ./../../system/include 
-         ./../../system/jlib
-         ./../../system/security/shared
+         ${HPCC_SOURCE_DIR}/common/remote 
+         ${HPCC_SOURCE_DIR}/common/environment 
+         ${HPCC_SOURCE_DIR}/dali/server 
+         ${HPCC_SOURCE_DIR}/dali/base 
+         ${HPCC_SOURCE_DIR}/system/mp 
+         ${HPCC_SOURCE_DIR}/system/include 
+         ${HPCC_SOURCE_DIR}/system/jlib
+         ${HPCC_SOURCE_DIR}/system/security/shared
     )
 
 ADD_DEFINITIONS( -D_CONSOLE )
@@ -47,6 +48,7 @@ target_link_libraries ( dalidiag
          mp 
          hrpc 
          remote 
+         environment 
          dalibase 
     )
 

+ 26 - 18
dali/dalidiag/dalidiag.cpp

@@ -30,6 +30,8 @@
 #include "jptree.hpp"
 #include "jlzw.hpp"
 
+#include "environment.hpp"
+
 static const char *cmds[] = { "locks", "sdsstats", "sdssubscribers", "connections", "threads", "mpqueue", "clients", "mpverify", "timeq", "cleanq",  "timesds", "build", "sdsfetch", "dirparts", "sdssize", "nodeinfo", "slavenode", "backuplist", "save", NULL };
 
 void usage(const char *exe)
@@ -299,24 +301,30 @@ void partInfo(const char *name,unsigned copy)
 
 void nodeInfo(const char *ip)
 {
-    Owned<IRemoteConnection> conn = querySDS().connect("/Environment", myProcessSession(), 0, INFINITE);
-    IPropertyTree* root = conn->queryRoot();
-    StringBuffer query("Hardware/Computer[@netAddress=\"");
-    query.append(ip).append("\"]");
-    Owned<IPropertyTree> machine = root->getPropTree(query.str());
-    if (machine) {
-        printf("Node:       %s\n",ip);
-        const char *name=machine->queryProp("@name");
-        printf("Name:       %s\n",name);
-        printf("State:      %s\n",machine->queryProp("@state"));
-        Owned<IPropertyTreeIterator> clusters= root->getElements("Software/ThorCluster");
-        ForEach(*clusters) {
-            query.clear().append("ThorSlaveProcess[@computer=\"").append(name).append("\"]");
-            IPropertyTree &cluster = clusters->query();
-            Owned<IPropertyTree> slave = cluster.getPropTree(query.str());
-            if (slave) {
-                printf("Cluster:    %s\n",cluster.queryProp("@name"));
-                printf("Id:         %s\n",slave->queryProp("@name"));
+    Owned<IEnvironmentFactory> factory = getEnvironmentFactory(false);
+    Owned<IConstEnvironment> env = factory->openEnvironment();
+
+    Owned<IConstMachineInfo> machine = env->getMachineByAddress(ip);
+    if (machine)
+    {
+        Owned<const IPropertyTree> machinePTree = &machine->getPTree();
+
+        printf("Node:       %s\n", ip);
+        const char *name = machinePTree->queryProp("@name");
+        printf("Name:       %s\n", name);
+        printf("State:      %s\n", machinePTree->queryProp("@state"));
+
+        Owned<const IPropertyTree> envPTree = &env->getPTree();
+        Owned<IPropertyTreeIterator> clusters = envPTree->getElements("Software/ThorCluster");
+        ForEach(*clusters)
+        {
+            const IPropertyTree &cluster = clusters->query();
+            VStringBuffer xpath("ThorSlaveProcess[@computer=\"%s\"]", name);
+            const IPropertyTree *slave = cluster.queryPropTree(xpath);
+            if (slave)
+            {
+                printf("Cluster:    %s\n", cluster.queryProp("@name"));
+                printf("Id:         %s\n", slave->queryProp("@name"));
             }
         }
     }

+ 5 - 5
dali/datest/dfuwutest.cpp

@@ -472,8 +472,7 @@ void testRoxieDest()
 
 void testRoxieCopies()
 {
-    Owned<IGroup> grp = createIGroup("10.173.10.81-90");
-    queryNamedGroupStore().add("__test_cluster_10",grp,true);
+    queryNamedGroupStore().add("__test_cluster_10", { "10.173.10.81-90" }, true);
     Owned<IDFUWorkUnitFactory> factory = getDFUWorkUnitFactory();
     for (unsigned i= 0; i<18;i++) {
         Owned<IDFUWorkUnit> wu = factory->createWorkUnit();
@@ -781,9 +780,10 @@ void testRepeatedFiles2(StringBuffer &wuid)
 static IGroup *getAuxGroup()
 {
     IGroup *auxgrp = queryNamedGroupStore().lookup("test_dummy_group");
-    if (!auxgrp) {
-        auxgrp = createIGroup("10.173.34.70-77");
-        queryNamedGroupStore().add("test_dummy_group",auxgrp,true,"/c$/dummydata");
+    if (!auxgrp)
+    {
+        queryNamedGroupStore().add("test_dummy_group", { "10.173.34.70-77" }, true, "/c$/dummydata");
+        auxgrp = queryNamedGroupStore().lookup("test_dummy_group");
     }
     return auxgrp;
 }

+ 32 - 34
dali/daunittest/dautdfs.cpp

@@ -103,46 +103,44 @@ public:
 protected:
     void testDFS() 
     { 
-        Owned<IGroup> grp = createIGroup("192.168.1.1");
-        dfsgroup->add(DFSUTGROUP "1", grp, true);
-        SocketEndpointArray epa;
+        dfsgroup->add(DFSUTGROUP "1", { "192.168.1.1" }, true);
+        std::vector<std::string> hosts;
         unsigned n;
         StringBuffer s;
-        for (n=0;n<7;n++) {
+        for (n=0;n<7;n++)
+        {
             s.clear().append("192.168.2.").append(n+1);
-            SocketEndpoint ep(s.str());
-            epa.append(ep);
+            hosts.push_back(s.str());
         }
-        grp.setown(createIGroup(epa)); 
-        dfsgroup->add(DFSUTGROUP "7", grp, true);
-        epa.kill();
-        for (n=0;n<400;n++) {
+        dfsgroup->add(DFSUTGROUP "7", hosts, true);
+        hosts.clear();
+        for (n=0;n<400;n++)
+        {
             s.clear().append("192.168.").append(n/256+3).append('.').append(n%256+1);
-            SocketEndpoint ep(s.str());
-            epa.append(ep);
+            hosts.push_back(s.str());
         }
-        grp.setown(createIGroup(epa)); 
-        dfsgroup->add(DFSUTGROUP "400", grp, true);
-        epa.kill();
-        for (n=0;n<400;n++) {
+        dfsgroup->add(DFSUTGROUP "400", hosts, true);
+        hosts.clear();
+        for (n=0;n<400;n++)
+        {
             s.clear().append("192.168.").append(n/256+5).append('.').append(n%256+1);
-            SocketEndpoint ep(s.str());
-            epa.append(ep);
+            hosts.push_back(s.str());
         }
-        grp.setown(createIGroup(epa)); 
-        dfsgroup->add(DFSUTGROUP "400b", grp, true);
-        grp.setown(dfsgroup->lookup(DFSUTGROUP "400"));
+        dfsgroup->add(DFSUTGROUP "400b", hosts, true);
+        hosts.clear();
+        Owned<IGroup> grp = dfsgroup->lookup(DFSUTGROUP "400");
         CPPUNIT_ASSERT(grp.get()!=NULL);
         CPPUNIT_ASSERT(grp->ordinality()==400);
-        epa.kill();
-        for (n=0;n<grp->ordinality();n++) {
+        for (n=0;n<grp->ordinality();n++)
+        {
             s.clear().append("192.168.").append(n/256+3).append('.').append(n%256+1);
             SocketEndpoint ep(s.str());
-            epa.append(ep);
-            CPPUNIT_ASSERT(epa.item(n).equals(grp->queryNode(n).endpoint()));
+            CPPUNIT_ASSERT(ep.equals(grp->queryNode(n).endpoint()));
         }
-        epa.kill();
-        for (n=0;n<grp->ordinality();n++) {
+
+        SocketEndpointArray epa;
+        for (n=0;n<grp->ordinality();n++)
+        {
             s.clear().append("192.168.").append(n/256+5).append('.').append(n%256+1);
             SocketEndpoint ep(s.str());
             epa.append(ep);
@@ -151,15 +149,15 @@ protected:
         CPPUNIT_ASSERT(dfsgroup->find(grp, s.clear()));     
         CPPUNIT_ASSERT(strcmp(DFSUTGROUP "400b",s.str())==0);
         epa.kill();
-        for (n=0;n<7;n++) {
+        grp.clear();
+
+        for (n=0;n<7;n++)
+        {
             s.clear().append("192.168.7.").append(n+1);
-            SocketEndpoint ep(s.str());
-            epa.append(ep);
+            hosts.push_back(s.str());
         }
-        grp.setown(createIGroup(epa)); 
-        dfsgroup->add(DFSUTGROUP "7b", grp, true, "/c$/altdir");
-        epa.kill();
-        grp.clear();
+        dfsgroup->add(DFSUTGROUP "7b", hosts, true, "/c$/altdir");
+        hosts.clear();
 
         ClusterPartDiskMapSpec mapping;
 

+ 1 - 1
ecl/eclagent/eclagent.cpp

@@ -3253,7 +3253,7 @@ IGroup *EclAgent::getHThorGroup(StringBuffer &out)
     }
     // this shouldn't happen but..
     DBGLOG("Adding group %s",mygroupname.str());
-    queryNamedGroupStore().add(mygroupname.str(),mygrp,true);
+    queryNamedGroupStore().add(mygroupname.str(), { GetCachedHostName() }, true);
     out.append(mygroupname);
     return queryNamedGroupStore().lookup(mygroupname.str());
 }

+ 5 - 1
esp/services/ws_dfu/ws_dfuService.cpp

@@ -6200,7 +6200,11 @@ static IGroup *getDFUFileIGroup(const char *clusterName, ClusterType clusterType
             throw MakeStringException(ECLWATCH_INVALID_INPUT, "getDFUFileIGroup: Failed to get ConfigurationDirectory: %s.", groupName.str());
 
         GroupType grpType = (clusterType == ThorLCRCluster) ? grp_thor : ((clusterType == HThorCluster) ? grp_hthor : grp_roxie);
-        queryNamedGroupStore().add(groupName.str(), group, false, defaultDir.str(), grpType);
+
+        std::vector<std::string> hosts;
+        ForEachItemIn(l, locations)
+            hosts.push_back(locations.item(l));
+        queryNamedGroupStore().add(groupName, hosts, false, defaultDir, grpType);
         ESPLOG(LogMin, "DFUFileIGroup %s added", groupName.str());
     }
     return group.getClear();

+ 8 - 5
fs/dafilesrv/dafilesrv.cpp

@@ -535,11 +535,14 @@ int main(int argc,char **argv)
                 rowServiceConfiguration = daFileSrv->queryProp("@rowServiceConfiguration");
 
             // any overrides by Instance definitions?
-            // NB: This won't work if netAddress is "." or if we start supporting hostnames there
-            StringBuffer ipStr;
-            queryHostIP().getIpText(ipStr);
-            VStringBuffer daFileSrvPath("Instance[@netAddress=\"%s\"]", ipStr.str());
-            IPropertyTree *dafileSrvInstance = daFileSrv->queryPropTree(daFileSrvPath);
+            IPropertyTree *dafileSrvInstance = nullptr;
+            Owned<IPropertyTreeIterator> iter = daFileSrv->getElements("Instance");
+            ForEach(*iter)
+            {
+                IpAddress instanceIP(iter->query().queryProp("@netAddress"));
+                if (instanceIP.ipequals(queryHostIP()))
+                    dafileSrvInstance = &iter->query();
+            }
             if (dafileSrvInstance)
             {
                 Owned<IPropertyTree> _dafileSrvInstance;

+ 4 - 3
roxie/ccd/ccdfile.cpp

@@ -2883,7 +2883,8 @@ private:
         if (!group)
             throw MakeStringException(0, "Unknown cluster %s while writing file %s",
                     cluster, dFile->queryLogicalName());
-        if (group->isMember())
+        rank_t r = group->rank();
+        if (RANK_NULL != r)
         {
             if (localCluster)
                 throw MakeStringException(0, "Cluster %s occupies node already specified while writing file %s",
@@ -2892,8 +2893,8 @@ private:
             SocketEndpoint me(0, myNode.getNodeAddress());
             eps.append(me);
             localCluster.setown(createIGroup(eps));
-            StringBuffer clusterName;
-            localClusterName.set(eps.getText(clusterName));
+            VStringBuffer clusterName("%s[%u]", cluster, r+1);
+            localClusterName.set(clusterName);
         }
         else
         {

+ 33 - 0
system/jlib/jsocket.cpp

@@ -6832,6 +6832,39 @@ int wait_write_multiple(UnsignedArray &socks,       //IN   sockets to be checked
     return wait_multiple(false, socks, timeoutMS, readySocks);
 }
 
+inline bool isIPV4Internal(const char *ip)
+{
+    struct sockaddr_in sa;
+    return 0 != inet_pton(AF_INET, ip, &sa.sin_addr);
+}
+
+inline bool isIPV6Internal(const char *ip)
+{
+    struct sockaddr_in6 sa;
+    return 0 != inet_pton(AF_INET6, ip, &sa.sin6_addr);
+}
+
+bool isIPV4(const char *ip)
+{
+    if (isEmptyString(ip))
+        return false;
+    return isIPV4Internal(ip);
+}
+
+bool isIPV6(const char *ip)
+{
+    if (isEmptyString(ip))
+        return false;
+    return isIPV6Internal(ip);
+}
+
+bool isIPAddress(const char *ip)
+{
+    if (isEmptyString(ip))
+        return false;
+    return isIPV4(ip) || isIPV6(ip);
+}
+
 
 class CWhiteListHandler : public CSimpleInterfaceOf<IWhiteListHandler>, implements IWhiteListWriter
 {

+ 6 - 2
system/jlib/jsocket.hpp

@@ -163,7 +163,8 @@ public:
         port = other.port;
         return *this;
     }
-	bool operator == (const SocketEndpoint &other) const { return equals(other); }
+    bool operator == (const SocketEndpoint &other) const { return equals(other); }
+    bool operator != (const SocketEndpoint &other) const { return !equals(other); }
 
     unsigned hash(unsigned prev) const;
     
@@ -639,6 +640,10 @@ extern jlib_decl int wait_write_multiple(UnsignedArray  &socks,     //IN   socke
 extern jlib_decl void throwJSocketException(int jsockErr);
 extern jlib_decl IJSOCK_Exception* createJSocketException(int jsockErr, const char *_msg);
 
+extern jlib_decl bool isIPV4(const char *ip);
+extern jlib_decl bool isIPV6(const char *ip);
+extern jlib_decl bool isIPAddress(const char *ip);
+
 interface IWhiteListHandler : extends IInterface
 {
     virtual bool isWhiteListed(const char *ip, unsigned __int64 role, StringBuffer *responseText=nullptr) const = 0;
@@ -656,6 +661,5 @@ typedef std::function<bool(IWhiteListWriter &)> WhiteListPopulateFunction;
 typedef std::function<StringBuffer &(StringBuffer &, unsigned __int64)> WhiteListFormatFunction;
 extern jlib_decl IWhiteListHandler *createWhiteListHandler(WhiteListPopulateFunction populateFunc, WhiteListFormatFunction roleFormatFunc = {}); // format function optional
 
-
 #endif
 

+ 1 - 1
system/mp/mpbase.hpp

@@ -92,7 +92,7 @@ interface IGroup: extends IInterface
 interface IGroupResolver: extends IInterface
 {
     virtual IGroup *lookup(const char *logicalgroupname) = 0;
-    virtual bool find(IGroup *grp,StringBuffer &name, bool add) = 0;
+    virtual bool find(IGroup *grp,StringBuffer &name, bool add=false) = 0;
 };
 
 interface INode: extends IInterface

+ 16 - 17
testing/unittests/dalitests.cpp

@@ -32,6 +32,8 @@
 #include "danqs.hpp"
 #include "dautils.hpp"
 
+#include <vector>
+
 #include "unittests.hpp"
 
 //#define COMPAT
@@ -506,8 +508,8 @@ public:
             file->attach("test::ftest" TN,user);
 #undef TN
         }
-        Owned<IGroup> grp3 = createIGroup("10.150.10.1,10.150.10.2,10.150.10.3");
-        queryNamedGroupStore().add("__testgroup3__",grp3,true);
+        queryNamedGroupStore().add("__testgroup3__", { "10.150.10.1", "10.150.10.2", "10.150.10.3" },true);
+        Owned<IGroup> grp3 = queryNamedGroupStore().lookup("test_dummy_group");
         {   // 3: three parts file old method
 #define TN "3"
             removeLogical("test::ftest" TN, user);
@@ -559,14 +561,14 @@ public:
         unsigned t;
         queryNamedGroupStore().remove("daregress_group");
         dir.removeEntry("daregress::superfile1", user);
-        SocketEndpointArray epa;
-        for (n=0;n<400;n++) {
+        std::vector<std::string> hosts;
+        for (n=0;n<400;n++)
+        {
             s.clear().append("192.168.").append(n/256).append('.').append(n%256);
-            SocketEndpoint ep(s.str());
-            epa.append(ep);
+            hosts.push_back(s.str());
         }
-        Owned<IGroup> group = createIGroup(epa);
-        queryNamedGroupStore().add("daregress_group",group,true);
+        queryNamedGroupStore().add("daregress_group", hosts, true);
+        Owned<IGroup> group = queryNamedGroupStore().lookup("daregress_group");
         ASSERT(queryNamedGroupStore().find(group,s.clear()) && "Created logical group not found");
         ASSERT(stricmp(s.str(),"daregress_group")==0 && "Created logical group found with wrong name");
         group.setown(queryNamedGroupStore().lookup("daregress_group"));
@@ -1733,21 +1735,18 @@ public:
     }
     void testMultiCluster()
     {
-        Owned<IGroup> grp1 = createIGroup("192.168.51.1-5");
-        Owned<IGroup> grp2 = createIGroup("192.168.16.1-5");
-        Owned<IGroup> grp3 = createIGroup("192.168.53.1-5");
-        queryNamedGroupStore().add("testgrp1",grp1);
-        queryNamedGroupStore().add("testgrp2",grp2);
-        queryNamedGroupStore().add("testgrp3",grp3);
+        queryNamedGroupStore().add("testgrp1", { "192.168.51.1-5" });
+        queryNamedGroupStore().add("testgrp2", { "192.168.16.1-5" });
+        queryNamedGroupStore().add("testgrp3", { "192.168.53.1-5" });
 
         Owned<IFileDescriptor> fdesc = createFileDescriptor();
         fdesc->setDefaultDir("/c$/thordata/test");
         fdesc->setPartMask("testfile1._$P$_of_$N$");
         fdesc->setNumParts(5);
         ClusterPartDiskMapSpec mapping;
-        fdesc->addCluster(grp1,mapping);
-        fdesc->addCluster(grp2,mapping);
-        fdesc->addCluster(grp3,mapping);
+        fdesc->addCluster("testgrp1", nullptr, mapping);
+        fdesc->addCluster("testgrp2", nullptr, mapping);
+        fdesc->addCluster("testgrp3", nullptr, mapping);
         removeLogical("test::testfile1", user);
         Owned<IDistributedFile> file = queryDistributedFileDirectory().createNew(fdesc);
         removeLogical("test::testfile1", user);

+ 16 - 6
tools/swapnode/swapnode.cpp

@@ -268,24 +268,34 @@ int main(int argc, const char *argv[])
                     case cmd_addspares:
                     case cmd_removespares:
                     {
+                        std::vector<std::string> hosts;
                         SocketEndpointArray allEps;
                         unsigned p=2;
                         do
                         {
-                            const char *ipOrRange = params.item(p);
+                            const char *hostOrIpRange = params.item(p);
                             SocketEndpointArray epa;
-                            epa.fromText(ipOrRange, 0);
-                            ForEachItemIn(e, epa)
-                                allEps.append(epa.item(e));
+                            epa.fromText(hostOrIpRange, 0);
+                            if (epa.ordinality()>1)
+                            {
+                                ForEachItemIn(e, epa)
+                                {
+                                    StringBuffer ipStr;
+                                    epa.item(e).getIpText(ipStr);
+                                    hosts.push_back(ipStr.str());
+                                }
+                            }
+                            else
+                                hosts.push_back(hostOrIpRange);
                             p++;
                         }
                         while (p<params.ordinality());
                         StringBuffer response;
                         bool res;
                         if (cmd == cmd_addspares)
-                            res = addClusterSpares(clusterName, "ThorCluster", allEps, response);
+                            res = addClusterSpares(clusterName, "ThorCluster", hosts, response);
                         else
-                            res = removeClusterSpares(clusterName, "ThorCluster", allEps, response);
+                            res = removeClusterSpares(clusterName, "ThorCluster", hosts, response);
                         if (!res)
                         {
                             WARNLOG("%s", response.str());

+ 27 - 25
tools/swapnode/swapnodelib.cpp

@@ -154,24 +154,26 @@ protected:
         }
         return info.getClear();
     }
-    bool doSwap(const char *oldip, const char *newip)
+    bool doSwap(const char *oldHost, const char *newHost)
     {
-        Owned<INode> newNode = createINode(newip);
-        Owned<INode> oldNode = createINode(oldip);
-        if (!group->isMember(oldNode)) {
-            ERRLOG("Node %s is not part of group %s", oldip, groupName.get());
+        Owned<INode> newNode = createINode(newHost);
+        Owned<INode> oldNode = createINode(oldHost);
+        if (!group->isMember(oldNode))
+        {
+            ERRLOG("Node %s is not part of group %s", oldHost, groupName.get());
             return false;
         }
-        if (group->isMember(newNode)) {
-            ERRLOG("Node %s is already part of group %s", newip, groupName.get());
+        if (group->isMember(newNode))
+        {
+            ERRLOG("Node %s is already part of group %s", newHost, groupName.get());
             return false;
         }
-        queryNamedGroupStore().swapNode(oldNode->endpoint(),newNode->endpoint());
+        queryNamedGroupStore().swapNode(oldHost, newHost);
         return true;
     }
-    bool doSingleSwapNode(const char *oldip,const char *newip,unsigned nodenum,IPropertyTree *info,const char *timechecked)
+    bool doSingleSwapNode(const char *oldHost, const char *newHost, unsigned nodenum, IPropertyTree *info, const char *timechecked)
     {
-        if (doSwap(oldip,newip)) {
+        if (doSwap(oldHost, newHost)) {
             if (info) {
                 StringBuffer times(timechecked);
                 if (times.length()==0) {
@@ -181,12 +183,12 @@ protected:
                 }
                 // TBD tie up with bad node in auto?
 
-                IPropertyTree *swap = info->addPropTree("Swap",createPTree("Swap"));
-                swap->setProp("@inNetAddress",newip);
-                swap->setProp("@outNetAddress",oldip);
-                swap->setProp("@time",times.str());
+                IPropertyTree *swap = info->addPropTree("Swap", createPTree("Swap"));
+                swap->setProp("@inNetAddress", newHost);
+                swap->setProp("@outNetAddress", oldHost);
+                swap->setProp("@time", times.str());
                 if (UINT_MAX != nodenum)
-                    swap->setPropInt("@rank",nodenum-1);
+                    swap->setPropInt("@rank", nodenum-1);
             }
             return true;
         }
@@ -503,23 +505,22 @@ public:
     CSingleSwapNode(const char *clusterName) : CSwapNode(clusterName)
     {
     }
-    bool swap(const char *oldip, const char *newip)
+    bool swap(const char *oldHost, const char *newHost)
     {
         ensureThorIsDown(clusterName,false,false);
 
         Owned<IPropertyTree> info = getSwapNodeInfo(true);
-        if (!doSingleSwapNode(oldip,newip,UINT_MAX,info,NULL))
+        if (!doSingleSwapNode(oldHost, newHost, UINT_MAX, info, NULL))
             return false;
         // check to see if it was a spare and remove
-        SocketEndpoint spareEp(newip);
+        SocketEndpoint spareEp(newHost);
         if (spareGroup)
         {
             rank_t r = spareGroup->rank(spareEp);
             if (RANK_NULL != r)
             {
-                PROGLOG("Removing spare : %s", newip);
-                spareGroup.setown(spareGroup->remove(r));
-                queryNamedGroupStore().add(spareGroupName, spareGroup); // NB: replace
+                PROGLOG("Removing spare : %s", newHost);
+                queryNamedGroupStore().removeNode(spareGroupName, newHost);
             }
         }
         info.clear();
@@ -530,11 +531,11 @@ public:
     }
 };
 
-bool swapNode(const char *cluster, const char *oldip, const char *newip)
+bool swapNode(const char *cluster, const char *oldHost, const char *newHost)
 {
-    PROGLOG("SWAPNODE(%s,%s,%s) starting",cluster,oldip,newip);
+    PROGLOG("SWAPNODE(%s,%s,%s) starting",cluster, oldHost, newHost);
     CSingleSwapNode swapNode(cluster);
-    return swapNode.swap(oldip, newip);
+    return swapNode.swap(oldHost, newHost);
 }
 
 
@@ -711,8 +712,9 @@ class CAutoSwapNode : public CSwapNode
             StringBuffer to;
             spareEp.getIpText(to);
             rank_t r = spareGroup->rank(spareEp);
+            dbgassertex(RANK_NULL != r);
             spareGroup.setown(spareGroup->remove(r));
-            queryNamedGroupStore().add(spareGroupName, spareGroup); // NB: replace
+            queryNamedGroupStore().removeNode(spareGroupName, to);
             Owned<IPropertyTree> info = getSwapNodeInfo(false);
             if (doSingleSwapNode(from.str(),to.str(),badrank.item(i4)+1,info,ts.str())) {
                 StringBuffer msg;

+ 1 - 1
tools/swapnode/swapnodelib.hpp

@@ -25,7 +25,7 @@
 #endif
 
 interface IPropertyTree;
-extern swapnodelib_decl bool swapNode(const char *cluster, const char *oldIP, const char *newIP);
+extern swapnodelib_decl bool swapNode(const char *cluster, const char *oldHost, const char *newHost);
 extern swapnodelib_decl void emailSwap(const char *cluster, const char *msg, bool warn=false, bool sendswapped=false, bool sendhistory=false);
 
 // Called from swapnode and thor