Преглед на файлове

HPCC-26569 Roxie should not report ready unless all channels available

Also handle explicitly closed servers differently from ones that we have not
heard from.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman преди 3 години
родител
ревизия
e3cefd3183
променени са 5 файла, в които са добавени 94 реда и са изтрити 20 реда
  1. 1 1
      roxie/ccd/ccd.hpp
  2. 2 2
      roxie/ccd/ccdmain.cpp
  3. 5 3
      roxie/topo/toposerver.cpp
  4. 84 14
      roxie/udplib/udptopo.cpp
  5. 2 0
      roxie/udplib/udptopo.hpp

+ 1 - 1
roxie/ccd/ccd.hpp

@@ -22,6 +22,7 @@
 #include "jsocket.hpp"
 #include "jptree.hpp"
 #include "udplib.hpp"
+#include "udptopo.hpp"
 #include "portlist.h"
 #include "thorsoapcall.hpp"
 #include "thorxmlwrite.hpp"
@@ -284,7 +285,6 @@ enum class SinkMode : byte
 
 // Global configuration info
 extern bool shuttingDown;
-extern unsigned numChannels;
 extern unsigned callbackRetries;
 extern unsigned callbackTimeout;
 extern unsigned lowTimeout;

+ 2 - 2
roxie/ccd/ccdmain.cpp

@@ -57,7 +57,6 @@
 //=================================================================================
 
 bool shuttingDown = false;
-unsigned numChannels;
 unsigned callbackRetries = 3;
 unsigned callbackTimeout = 5000;
 unsigned lowTimeout = 10000;
@@ -1438,8 +1437,9 @@ int CCD_API roxie_main(int argc, const char *argv[], const char * defaultYaml)
                 queryFileCache().startCacheReporter();
 #ifdef _CONTAINERIZED
                 publishTopology(traceLevel, myRoles);
-#endif
+#else
                 writeSentinelFile(sentinelFile);
+#endif
                 DBGLOG("Startup completed - LPT=%u APT=%u", queryNumLocalTrees(), queryNumAtomTrees());
                 DBGLOG("Waiting for queries");
                 if (pingInterval)

+ 5 - 3
roxie/topo/toposerver.cpp

@@ -152,7 +152,8 @@ void timeoutTopology()
         // If a server is missing a heartbeat for a while, we mark it as down. Queued packets for that server will get discarded, and
         // it will be sorted to the end of the priority list for agent requests
         // The timeout is different for server vs agent - for servers, we want to be sure it really is down, and there's no huge cost for waiting,
-        // while for agents we want to divert traffic away from it ASAP (so long as there are other destinations available
+        // while for agents we want to divert traffic away from it ASAP (so long as there are other destinations available)
+        // Note that there's a difference between 'sick' and 'shut down'
         if (now-lastSeen > timeout)
         {
             if (traceLevel)
@@ -165,7 +166,8 @@ void timeoutTopology()
                 it = topology.erase(it);
                 continue;
             }
-            it->second.instance = 0;  // By leaving the entry present but with instance=0, we will ensure that all clients get to see that the machine is no longer present
+            if (it->second.instance != (time_t) -1)
+                it->second.instance = 0;  // By leaving the entry present but with instance=0, we will ensure that all clients get to see that the machine is degraded
         }
         ++it;
     }
@@ -266,7 +268,7 @@ void doServer(ISocket *socket)
                         if (line[0]=='-')
                         {
                             line = line.substr(1);
-                            instance = 0;
+                            instance = (time_t) -1;
                         }
                         if (traceLevel >= 6)
                             DBGLOG("Adding entry %s instance %" I64F "u", line.c_str(), (__uint64) instance);

+ 84 - 14
roxie/udplib/udptopo.cpp

@@ -16,6 +16,7 @@
 ############################################################################## */
 
 #include "jmisc.hpp"
+#include "jfile.hpp"
 #include "udplib.hpp"
 #include "udptopo.hpp"
 #include "udpipmap.hpp"
@@ -126,6 +127,7 @@ public:
     virtual bool implementsChannel(unsigned channel) const override;
     virtual StringBuffer &report(StringBuffer &ret) const override;
     virtual time_t queryServerInstance(const SocketEndpoint &ep) const override;
+    virtual void updateStatus() const override;
 private:
     std::map<unsigned, SocketEndpointArray> agents;  // indexed by channel
     std::map<unsigned, SocketEndpointArray> servers; // indexed by port
@@ -141,6 +143,12 @@ private:
 };
 
 SocketEndpoint myAgentEP;
+unsigned numChannels;
+
+static bool isActive(time_t instance)
+{
+    return instance != 0 && instance != time_t(-1);
+}
 
 CTopologyServer::CTopologyServer()
 {
@@ -194,7 +202,7 @@ CTopologyServer::CTopologyServer(const char *topologyInfo, const ITopologyServer
             }
             if (streq(role, "agent"))
             {
-                if (instance || ep.equals(myAgentEP))
+                if (isActive(instance) || ep.equals(myAgentEP))
                 {
                     agents[channel].append(ep);
                     if (ep.equals(myAgentEP))
@@ -205,7 +213,7 @@ CTopologyServer::CTopologyServer(const char *topologyInfo, const ITopologyServer
                     }
                     agents[0].append(ep);
                 }
-                else
+                else if (!instance)
                 {
                     degradedAgents[channel].append(ep);
                 }
@@ -213,13 +221,13 @@ CTopologyServer::CTopologyServer(const char *topologyInfo, const ITopologyServer
             else if (streq(role, "server"))
             {
                 time_t oldInstance = old ? old->queryServerInstance(ep) : 0;
-                if (!instance || (oldInstance && oldInstance != instance))
+                if (!isActive(instance) || (isActive(oldInstance) && oldInstance != instance))
                 {
                     StringBuffer s;
                     DBGLOG("Deleting pending data for server %s which has terminated or restarted", ep.getUrlStr(s).str());
                     ROQ->abortPendingData(ep);
                 }
-                if (instance)
+                if (isActive(instance))
                 {
                     servers[ep.port].append(ep);
                     serverInstances[ep] = instance;
@@ -304,6 +312,9 @@ bool CTopologyServer::implementsChannel(unsigned channel) const
 
 StringBuffer &CTopologyServer::report(StringBuffer &ret) const
 {
+#ifdef _DEBUG
+//    ret.append(rawData).newline();
+#endif
     for (auto it = agents.begin(); it != agents.end(); it++)
     {
         if (it->second.length())
@@ -323,6 +334,54 @@ StringBuffer &CTopologyServer::report(StringBuffer &ret) const
     return ret;
 }
 
+void CTopologyServer::updateStatus() const
+{
+    // Set the k8s ready probe status according to whether we have at least one agent available per channel
+    unsigned unready = 0;
+    StringBuffer report;
+    unsigned rangeStart = 0;
+    for (unsigned channel=1; channel <= numChannels; channel++)
+    {
+        if (!queryAgents(channel).length())
+        {
+            if (!rangeStart)
+                rangeStart = channel;
+            unready++;
+        }
+        else
+        {
+            if (rangeStart)
+            {
+                if (report.length())
+                    report.append(',');
+                report.appendf("%u", rangeStart);
+                if (rangeStart != channel-1)
+                    report.appendf("-%u", channel-1);
+            }
+            rangeStart = 0;
+        }
+    }
+    if (rangeStart)
+    {
+        if (report.length())
+            report.append(',');
+        report.appendf("%u", rangeStart);
+        if (rangeStart != numChannels)
+            report.appendf("-%u", numChannels);
+    }
+    Owned<IFile> sentinelFile = createSentinelTarget();
+    if (unready==0)
+    {
+        writeSentinelFile(sentinelFile);
+        DBGLOG("TOPO: all channels ready");
+    }
+    else
+    {
+        removeSentinelFile(sentinelFile);
+        DBGLOG("TOPO: %u channel%s not ready: %s", unready, unready==1 ? "" : "s", report.str());
+    }
+}
+
 const SocketEndpointArray CTopologyServer::nullArray;
 
 // Class TopologyManager (there is a single instance) handles interaction with topology servers
@@ -338,6 +397,7 @@ public:
     const ITopologyServer &getCurrent();
 
     bool update();
+    void setTraceLevel(unsigned _traceLevel) { traceLevel = _traceLevel; }
     unsigned numServers() const { return topoServers.length(); }
     void freeze(bool frozen);
 
@@ -351,6 +411,7 @@ private:
     const unsigned maxReasonableResponse = 32*32*1024;  // At ~ 32 bytes per entry, 1024 channels and 32-way redundancy that's a BIG cluster!
     StringBuffer md5;
     StringBuffer topoBuf;
+    unsigned traceLevel = 0;
     bool frozen = false;    // used for testing
 };
 
@@ -408,9 +469,23 @@ bool TopologyManager::update()
                                         md5.clear().append(eol-mem, mem);  // Note: includes '\n'
                                         Owned<const ITopologyServer> oldServer = &getCurrent();
                                         Owned<const ITopologyServer> newServer = new CTopologyServer(eol, oldServer);
-                                        SpinBlock b(lock);
-                                        currentTopology.swap(newServer);
+                                        {
+                                            SpinBlock b(lock);
+                                            currentTopology.swap(newServer);
+                                        }
                                         updated = true;
+                                        if (traceLevel)
+                                        {
+                                            DBGLOG("Topology information updated:");
+                                            StringBuffer s;
+                                            MLOG("%s", currentTopology->report(s).str());
+                                        }
+                                        currentTopology->updateStatus();
+                                    }
+                                    else
+                                    {
+                                        StringBuffer s;
+                                        DBGLOG("Unexpected response from topology server %s: %.*s", topoServers.item(idx), responseLen, mem);
                                     }
                                 }
                             }
@@ -529,19 +604,14 @@ extern UDPLIB_API void publishTopology(unsigned traceLevel, const std::vector<Ro
 {
     if (topologyManager.numServers())
     {
-        topoThread = std::thread([traceLevel, &myRoles]()
+        topologyManager.setTraceLevel(traceLevel);
+        topoThread = std::thread([&myRoles]()
         {
             topologyManager.update();
             unsigned waitTime = 1000;  // First time around we don't wait as long, so that system comes up faster
             while (!abortTopo.wait(waitTime))
             {
-                if (topologyManager.update() && traceLevel)
-                {
-                    DBGLOG("Topology information updated:");
-                    Owned<const ITopologyServer> c = getTopology();
-                    StringBuffer s;
-                    MLOG("%s", c->report(s).str());
-                }
+                topologyManager.update();
                 waitTime = heartbeatInterval;
             }
             topologyManager.closedown(myRoles);

+ 2 - 0
roxie/udplib/udptopo.hpp

@@ -69,6 +69,7 @@
 extern UDPLIB_API unsigned minIbytiDelay;
 extern UDPLIB_API unsigned initIbytiDelay;
 extern UDPLIB_API SocketEndpoint myAgentEP;
+extern UDPLIB_API unsigned numChannels;
 
 class UDPLIB_API ChannelInfo
 {
@@ -111,6 +112,7 @@ interface ITopologyServer : public IInterface
     virtual bool implementsChannel(unsigned channel) const = 0;
     virtual StringBuffer & report(StringBuffer &ret) const = 0;
     virtual time_t queryServerInstance(const SocketEndpoint &ep) const = 0;
+    virtual void updateStatus() const = 0;
 };
 
 extern UDPLIB_API unsigned getNumAgents(unsigned channel);