Browse Source

gh-2706 Add cli support for comparing state across roxie nodes

Add 'ecl roxie check <process_cluster>" which gathers state from the
roxie process cluster nodes and verifies that they all match.

Fixes gh-2706.

Signed-off-by: Anthony Fishbeck <Anthony.Fishbeck@lexisnexis.com>
Anthony Fishbeck 13 years ago
parent
commit
818aa2c4f6
4 changed files with 236 additions and 12 deletions
  1. 221 11
      ecl/eclcmd/roxie/ecl-roxie.cpp
  2. 2 1
      esp/scm/ws_smc.ecm
  3. 2 0
      esp/services/ws_smc/ws_smcService.cpp
  4. 11 0
      system/jlib/jstring.hpp

+ 221 - 11
ecl/eclcmd/roxie/ecl-roxie.cpp

@@ -35,6 +35,111 @@
 
 //=========================================================================================
 
+#define CHK_VERBOSE     0x0001
+#define CHK_SHOW_HASH   0x0002
+#define CHK_SHOW_ATTACH 0x0004
+
+#define CHK_SHOW_ALL (CHK_SHOW_HASH | CHK_SHOW_ATTACH)
+
+inline void checkAttached(IConstRoxieControlEndpointInfo &ep, unsigned *attached, unsigned *detached)
+{
+    if (ep.getAttached_isNull())
+        return;
+    if (ep.getAttached())
+    {
+        if (attached)
+            (*attached)++;
+    }
+    else if (detached)
+        (*detached)++;
+}
+
+void checkEndpointInfoAndOuput(IConstRoxieControlEndpointInfo &ep, unsigned flags, unsigned &notOk, unsigned *noHash=NULL, unsigned *noAddress=NULL, unsigned *attached=NULL, unsigned *detached=NULL)
+{
+    FILE *f = NULL;
+    const char *status = ep.getStatus();
+    bool ok = (status && strieq(status, "ok"));
+    if (!ok)
+    {
+        f=stderr;
+        notOk++;
+    }
+    else if (flags & CHK_VERBOSE)
+        f=stdout;
+
+    const char *hash = ep.getStateHash();
+    if (noHash && (!hash || !*hash))
+        (*noHash)++;
+    const char *address = ep.getAddress();
+    if (noAddress && (!address || !*address))
+        (*noAddress)++;
+    if (attached || detached)
+        checkAttached(ep, attached, detached);
+
+    if (f)
+    {
+        fputs("  ", f);
+        StringBuffer s;
+        fputs(s.set(address).padTo(21).append(' '), f);
+        if (flags & CHK_SHOW_HASH)
+            fputs(s.set(hash).padTo(20).append(' '), f);
+        if (!ep.getAttached_isNull() && (flags & CHK_SHOW_ATTACH))
+            fputs(ep.getAttached() ? "Attached " : "Detached ", f);
+        fputs((status && *status) ? status : "No-Status", f);
+        fputs("\n", f);
+    }
+}
+
+inline StringBuffer &endpointXML(IConstRoxieControlEndpointInfo &ep, StringBuffer &xml)
+{
+    appendXMLOpenTag(xml, "EndPoint", NULL, false);
+    appendXMLAttr(xml, "address", ep.getAddress());
+    if (!ep.getAttached_isNull())
+        appendXMLAttr(xml, "attached", ep.getAttached() ? "true" : "false");
+    appendXMLAttr(xml, "hash", ep.getStateHash());
+    xml.append("/>");
+    return xml;
+}
+
+void roxieStatusReport(IPropertyTree *hashTree, unsigned reporting, unsigned notOk, unsigned noHash, unsigned noAddress, unsigned attached, unsigned detached)
+{
+    if (notOk)
+        fprintf(stderr, "%d nodes had status != 'ok'\n", notOk);
+    if (noHash)
+        fprintf(stderr, "%d nodes had an empty hash\n", noHash);
+    if (noAddress)
+        fprintf(stderr, "%d nodes did not give an address\n", noAddress);
+
+    unsigned hashCount = hashTree->getCount("*");
+    if (0==hashCount)
+        fprintf(stderr, "No nodes reported a state hash\n");
+    else if (1==hashCount)
+        fprintf(stdout, "All nodes have matching state hash\n");
+    else
+        fprintf(stderr, "State hash mismatch\n");
+
+    Owned<IPropertyTreeIterator> hashGroups = hashTree->getElements("*");
+    ForEach(*hashGroups)
+    {
+        IPropertyTree &hashGroup = hashGroups->query();
+        fprintf(stdout, "  Hash [%s] - %d node(s)\n", hashGroup.queryName()+1, hashGroup.getCount("EndPoint"));
+    }
+
+    if (attached && detached)
+    {
+        fputs("Mismatched DALI Attachement\n", stderr);
+        fprintf(stderr, "  %d Node(s) attached to DALI\n", attached);
+        fprintf(stderr, "  %d Node(s) detached from DALI\n", detached);
+    }
+    else if (attached)
+        fputs("All nodes attached to DALI\n", stdout);
+    else if (detached)
+        fputs("All nodes detached from DALI\n", stdout);
+    else
+        fputs("No DALI attachment status reported\n", stderr);
+    fprintf(stdout, "%d Total node(s) reported\n", reporting);
+}
+
 class EclCmdRoxieAttach : public EclCmdCommon
 {
 public:
@@ -92,22 +197,19 @@ public:
         Owned<IClientRoxieControlCmdResponse> resp = client->RoxieControlCmd(req);
         if (resp->getExceptions().ordinality())
             outputMultiExceptions(resp->getExceptions());
+
         IArrayOf<IConstRoxieControlEndpointInfo> &endpoints = resp->getEndpoints();
-        bool failed = false;
+        unsigned reporting = endpoints.length();
+        unsigned notOk = 0;
+        unsigned flags = optVerbose ? CHK_VERBOSE : 0;
         ForEachItemIn(i, endpoints)
         {
             IConstRoxieControlEndpointInfo &ep = endpoints.item(i);
-            if (!ep.getStatus() || !strieq(ep.getStatus(), "ok"))
-            {
-                if (!failed)
-                    failed = true;
-                fprintf(stderr, "    %s - %s\n", ep.getAddress(), ep.getStatus() ? ep.getStatus() : "Unknown");
-            }
-            else if (optVerbose)
-                fprintf(stdout, "    %s - %s\n", ep.getAddress(), ep.getStatus());
+            checkEndpointInfoAndOuput(ep, flags, notOk);
         }
-        if (failed)
-            fprintf(stderr, "\nOne or more endpoints did not report status 'ok'\n");
+        if (notOk)
+            fprintf(stderr, "%d nodes had status != 'ok'\n", notOk);
+        fprintf(stdout, "%d Total node(s) reported\n", reporting);
         return 0;
     }
     virtual void usage()
@@ -147,6 +249,111 @@ private:
     bool attach;
 };
 
+class EclCmdRoxieCheck : public EclCmdCommon
+{
+public:
+    EclCmdRoxieCheck() : optMsToWait(10000)
+    {
+    }
+    virtual bool parseCommandLineOptions(ArgvIterator &iter)
+    {
+        for (; !iter.done(); iter.next())
+        {
+            const char *arg = iter.query();
+            if (*arg!='-')
+            {
+                if (optProcess.isEmpty())
+                    optProcess.set(arg);
+                else
+                {
+                    fprintf(stderr, "\nunrecognized argument %s\n", arg);
+                    return false;
+                }
+                continue;
+            }
+            if (iter.matchOption(optMsToWait, ECLOPT_WAIT))
+                continue;
+            if (EclCmdCommon::matchCommandLineOption(iter, true)!=EclCmdOptionMatch)
+                return false;
+        }
+        return true;
+    }
+    virtual bool finalizeOptions(IProperties *globals)
+    {
+        if (!EclCmdCommon::finalizeOptions(globals))
+            return false;
+        if (optProcess.isEmpty())
+        {
+            fputs("process cluster must be specified.\n\n", stderr);
+            return false;
+        }
+        return true;
+    }
+
+    virtual int processCMD()
+    {
+        Owned<IClientWsSMC> client = createWsSMCClient();
+        VStringBuffer url("http://%s:%s/WsSMC", optServer.sget(), optPort.sget());
+        client->addServiceUrl(url.str());
+        if (optUsername.length())
+            client->setUsernameToken(optUsername.get(), optPassword.sget(), NULL);
+
+        Owned<IClientRoxieControlCmdRequest> req = client->createRoxieControlCmdRequest();
+        req->setWait(optMsToWait);
+        req->setProcessCluster(optProcess);
+        req->setCommand(CRoxieControlCmd_STATE);
+
+        Owned<IClientRoxieControlCmdResponse> resp = client->RoxieControlCmd(req);
+        if (resp->getExceptions().ordinality())
+            outputMultiExceptions(resp->getExceptions());
+        IArrayOf<IConstRoxieControlEndpointInfo> &endpoints = resp->getEndpoints();
+
+        unsigned attached=0;
+        unsigned detached=0;
+        unsigned noHash=0;
+        unsigned noAddress=0;
+        unsigned notOk=0;
+
+        unsigned flags = CHK_SHOW_ALL;
+        if (optVerbose)
+            flags |= CHK_VERBOSE;
+
+        Owned<IPropertyTree> hashTree = createPTree(ipt_ordered);
+        ForEachItemIn(i, endpoints)
+        {
+            IConstRoxieControlEndpointInfo &ep = endpoints.item(i);
+            checkEndpointInfoAndOuput(ep, flags, notOk, &noHash, &noAddress, &attached, &detached);
+
+            StringBuffer x("H");
+            IPropertyTree *hashItem = ensurePTree(hashTree, x.append(ep.getStateHash()));
+            hashItem->addPropTree("EndPoint", createPTreeFromXMLString(endpointXML(ep, x.clear()).str()));
+        }
+        roxieStatusReport(hashTree, endpoints.length(), notOk, noHash, noAddress, attached, detached);
+        return 0;
+    }
+    virtual void usage()
+    {
+            fputs("\nUsage:\n"
+                "\n"
+                "The 'roxie check' command verifies that the state of all nodes in\n"
+                "the given roxie process cluster match.\n"
+                "\n"
+                "ecl roxie check <process_cluster>\n"
+                " Options:\n"
+                "   <process_cluster>      the roxie process cluster to check\n",
+                stdout);
+
+        fputs("\n"
+            "   --wait=<ms>            Max time to wait in milliseconds\n"
+            " Common Options:\n",
+            stdout);
+        EclCmdCommon::usage();
+    }
+private:
+    StringAttr optProcess;
+    unsigned optMsToWait;
+};
+
 IEclCommand *createEclRoxieCommand(const char *cmdname)
 {
     if (!cmdname || !*cmdname)
@@ -155,6 +362,8 @@ IEclCommand *createEclRoxieCommand(const char *cmdname)
         return new EclCmdRoxieAttach(true);
     if (strieq(cmdname, "detach"))
         return new EclCmdRoxieAttach(false);
+    if (strieq(cmdname, "check"))
+        return new EclCmdRoxieCheck();
     return NULL;
 }
 
@@ -175,6 +384,7 @@ public:
             "   Queries Commands:\n"
             "      attach         (re)attach a roxie cluster from dali\n"
             "      detach         detach a roxie cluster from dali\n"
+            "      check          verify that roxie nodes have matching state\n"
         );
     }
 };

+ 2 - 1
esp/scm/ws_smc.ecm

@@ -263,6 +263,7 @@ ESPenum RoxieControlCmd : string
 {
     ATTACH("Attach"),
     DETACH("Detach"),
+    STATE("State"),
     RELOAD("Reload")
 };
 
@@ -273,7 +274,7 @@ ESPrequest RoxieControlCmdRequest
     int Wait;
 };
 
-ESPStruct RoxieControlEndpointInfo
+ESPStruct [nil_remove] RoxieControlEndpointInfo
 {
     string Address;
     bool Attached;

+ 2 - 0
esp/services/ws_smc/ws_smcService.cpp

@@ -1481,6 +1481,8 @@ inline const char *controlCmdMessage(int cmd)
         return "<control:lockDali/>";
     case CRoxieControlCmd_RELOAD:
         return "<control:reload/>";
+    case CRoxieControlCmd_STATE:
+        return "<control:state/>";
     default:
         throw MakeStringException(ECLWATCH_MISSING_PARAMS, "Unknown Roxie Control Command.");
     }

+ 11 - 0
system/jlib/jstring.hpp

@@ -382,6 +382,17 @@ inline StringBuffer &appendXMLTagName(StringBuffer &xml, const char *tag, const
 
 extern jlib_decl StringBuffer & appendXMLOpenTag(StringBuffer &xml, const char *tag, const char *prefix=NULL, bool complete=true, bool close=false, const char *uri=NULL);
 
+inline StringBuffer &appendXMLAttr(StringBuffer &xml, const char *name, const char *value, const char *prefix=NULL)
+{
+    if (!name || !*name || !value)
+        return xml;
+    xml.append(' ');
+    appendXMLTagName(xml, name, prefix);
+    encodeXML(value, xml.append("='"));
+    xml.append("'");
+    return xml;
+}
+
 inline StringBuffer & appendXMLCloseTag(StringBuffer &xml, const char *tag, const char *prefix=NULL)
 {
     if (!tag || !*tag)