فهرست منبع

Merge pull request #15488 from jakesmith/HPCC-26700-report-pod-skew

HPCC-26700 Report worker pod skew

Reviewed-by: Gavin Halliday <ghalliday@hpccsystems.com>
Merged-by: Gavin Halliday <ghalliday@hpccsystems.com>
Gavin Halliday 3 سال پیش
والد
کامیت
9f506ae0a2
6فایلهای تغییر یافته به همراه131 افزوده شده و 2 حذف شده
  1. 53 0
      common/workunit/workunit.cpp
  2. 3 0
      common/workunit/workunit.hpp
  3. 1 0
      system/jlib/jstatcodes.h
  4. 18 1
      system/jlib/jstats.cpp
  5. 2 1
      system/jlib/jstats.h
  6. 54 0
      thorlcr/master/thgraphmanager.cpp

+ 53 - 0
common/workunit/workunit.cpp

@@ -14467,4 +14467,57 @@ std::pair<std::string, unsigned> getExternalService(const char *serviceName)
     return servicePair;
 }
 
+// returns a vector of {pod-name, node-name} vectors,
+// represented as a nested vector for extensibility, e.g. to add other meta fields
+std::vector<std::vector<std::string>> getPodNodes(const char *selector)
+{
+    VStringBuffer getWorkerNodes("kubectl get pods --selector=job-name=%s \"--output=jsonpath={range .items[*]}{.metadata.name},{.spec.nodeName}{'\\n'}{end}\"", selector);
+    StringBuffer result;
+    runKubectlCommand("get-worker-nodes", getWorkerNodes, nullptr, &result);
+
+    if (result.isEmpty())
+        throw makeStringExceptionV(-1, "No worker nodes found for selector '%s'", selector);
+
+    const char *start = result.str();
+    const char *finger = start;
+    std::string fieldName;
+    std::vector<std::vector<std::string>> results;
+    std::vector<std::string> current;
+    while (true)
+    {
+        switch (*finger)
+        {
+            case ',':
+            {
+                if (start == finger)
+                    throw makeStringException(-1, "getPodNodes: Missing node name(s) in output");
+                fieldName.assign(start, finger-start);
+                current.emplace_back(std::move(fieldName));
+                finger++;
+                start = finger;
+                break;
+            }
+            case '\n':
+            case '\0':
+            {
+                if (start == finger)
+                    throw makeStringException(-1, "getPodNodes: Missing pod name(s) in output");
+                fieldName.assign(start, finger-start);
+                current.emplace_back(std::move(fieldName));
+                results.emplace_back(std::move(current));
+                if ('\0' == *finger)
+                    return results;
+                finger++;
+                start = finger;
+                break;
+            }
+            default:
+            {
+                ++finger;
+                break;
+            }
+        }
+    }
+}
+
 #endif

+ 3 - 0
common/workunit/workunit.hpp

@@ -1768,6 +1768,9 @@ extern WORKUNIT_API void runK8sJob(const char *componentName, const char *wuid,
 
 // return the k8s external host and port for serviceName
 extern WORKUNIT_API std::pair<std::string, unsigned> getExternalService(const char *serviceName);
+
+// returns a vector of {pod-name, node-name} vectors,
+extern WORKUNIT_API std::vector<std::vector<std::string>> getPodNodes(const char *selector);
 #endif
 
 #endif

+ 1 - 0
system/jlib/jstatcodes.h

@@ -241,6 +241,7 @@ enum StatisticKind
     StTimeAgentWait,
     StCycleAgentWaitCycles,
     StCostFileAccess,
+    StNumPods,
     StMax,
 
     //For any quantity there is potentially the following variants.

+ 18 - 1
system/jlib/jstats.cpp

@@ -916,7 +916,8 @@ static const StatisticMeta statsMetaData[StMax] = {
     { SIZESTAT(AgentReply) },
     { TIMESTAT(AgentWait) },
     { CYCLESTAT(AgentWait) },
-    { COSTSTAT(FileAccess) }
+    { COSTSTAT(FileAccess) },
+    { NUMSTAT(Pods) }
 };
 
 //Is a 0 value likely, and useful to be reported if it does happen to be zero?
@@ -2565,6 +2566,17 @@ void CRuntimeSummaryStatisticCollection::DerivedStats::setStatistic(unsigned __i
     sumSquares = dvalue * dvalue;
 }
 
+double CRuntimeSummaryStatisticCollection::DerivedStats::queryStdDevInfo(unsigned __int64 &_min, unsigned __int64 &_max, unsigned &_minNode, unsigned &_maxNode) const
+{
+    _min = min;
+    _max = max;
+    _minNode = minNode;
+    _maxNode = maxNode;
+    double mean = sum / count;
+    double variance = (sumSquares - sum * mean) / count;
+    return sqrt(variance);
+}
+
 CRuntimeSummaryStatisticCollection::CRuntimeSummaryStatisticCollection(const StatisticsMapping & _mapping) : CRuntimeStatisticCollection(_mapping)
 {
     derived = new DerivedStats[ordinality()+1];
@@ -2594,6 +2606,11 @@ void CRuntimeSummaryStatisticCollection::setStatistic(StatisticKind kind, unsign
     derived[index].setStatistic(value, node);
 }
 
+double CRuntimeSummaryStatisticCollection::queryStdDevInfo(StatisticKind kind, unsigned __int64 &_min, unsigned __int64 &_max, unsigned &_minNode, unsigned &_maxNode) const
+{
+    return derived[queryMapping().getIndex(kind)].queryStdDevInfo(_min, _max, _minNode, _maxNode);
+}
+
 static bool skewHasMeaning(StatisticKind kind)
 {
     //Check that skew makes any sense for the type of measurement

+ 2 - 1
system/jlib/jstats.h

@@ -652,13 +652,14 @@ public:
 
     void mergeStatistic(StatisticKind kind, unsigned __int64 value, unsigned node);
     void setStatistic(StatisticKind kind, unsigned __int64 value, unsigned node);
-
+    double queryStdDevInfo(StatisticKind kind, unsigned __int64 &_min, unsigned __int64 &_max, unsigned &_minNode, unsigned &_maxNode) const;
 protected:
     struct DerivedStats
     {
     public:
         void mergeStatistic(unsigned __int64 value, unsigned node);
         void setStatistic(unsigned __int64 value, unsigned node);
+        double queryStdDevInfo(unsigned __int64 &_min, unsigned __int64 &_max, unsigned &_minNode, unsigned &_maxNode) const;
     public:
         unsigned __int64 max = 0;
         unsigned __int64 min = 0;

+ 54 - 0
thorlcr/master/thgraphmanager.cpp

@@ -16,6 +16,7 @@
 ############################################################################## */
 
 #include "platform.h"
+#include <math.h>
 #include "jarray.hpp"
 #include "jfile.hpp"
 #include "jmutex.hpp"
@@ -1001,12 +1002,65 @@ bool CJobManager::executeGraph(IConstWorkUnit &workunit, const char *graphName,
         } cBlock(activeTasks);
 
         {
+#ifdef _CONTAINERIZED
+            double stdDev = 0.0;
+            unsigned __int64 min, max;
+            unsigned minNode, maxNode;
+            const StatisticsMapping podStatistics({StNumPods});
+            CRuntimeSummaryStatisticCollection podStats(podStatistics);
+            std::vector<std::string> nodeNames; // ordered list of the unique node names
+            try
+            {
+                // collate pod distribution
+                VStringBuffer selector("thorworker-%s-%s", wuid.get(), graphName);
+                std::vector<std::vector<std::string>> pods = getPodNodes(selector.toLowerCase());
+                std::unordered_map<std::string, unsigned> podPerNodeCounts;
+                for (const auto &podNode: pods)
+                {
+                    const std::string &node = podNode[1]; // pod is 1st item, node is 2nd
+                    podPerNodeCounts[node]++; // NB: if doesn't exist is created with default value of 0 1st
+                }
+                for (const auto &node: podPerNodeCounts)
+                {
+                    podStats.mergeStatistic(StNumPods, node.second, nodeNames.size());
+                    nodeNames.push_back(node.first);
+                }
+                stdDev = podStats.queryStdDevInfo(StNumPods, min, max, minNode, maxNode);
+            }
+            catch (IException *e)
+            {
+                EXCLOG(e);
+                e->Release();
+            }
+
+            // calculate the above, before locking the workunit below to avoid holding lock whilst issuing getPodNodes call
+#endif
+
             Owned<IWorkUnit> wu = &workunit.lock();
             wu->setStatistic(queryStatisticsComponentType(), queryStatisticsComponentName(), SSTgraph, graphScope, StWhenStarted, NULL, startTs, 1, 0, StatsMergeAppend);
             //Could use addTimeStamp(wu, SSTgraph, graphName, StWhenStarted, wfid) if start time could be this point
             wu->setState(WUStateRunning);
             VStringBuffer version("%d.%d", THOR_VERSION_MAJOR, THOR_VERSION_MINOR);
             wu->setDebugValue("ThorVersion", version.str(), true);
+
+#ifdef _CONTAINERIZED
+            // issue warning and publish pod distribution stats, if any stddev
+            if (stdDev)
+            {
+                Owned<IStatisticGatherer> collector = createGlobalStatisticGatherer(wu);
+                StatsScopeId wfidScopeId(SSTworkflow, wfid);
+                StatsScopeId graphScopeId(graphName);
+                collector->beginScope(wfidScopeId);
+                collector->beginScope(graphScopeId);
+                podStats.recordStatistics(*collector, false);
+
+                StringBuffer scopeStr;
+                wfidScopeId.getScopeText(scopeStr).append(':');
+                graphScopeId.getScopeText(scopeStr);
+                Owned<IException> e = makeStringExceptionV(-1, "%s: Degraded performance. Worker pods are unevenly distributed over nodes. StdDev=%.2f. min node(%s) has %" I64F "u pods, max node(%s) has %" I64F "u pods", scopeStr.str(), stdDev, nodeNames[minNode].c_str(), min, nodeNames[maxNode].c_str(), max);
+                reportExceptionToWorkunit(*wu, e);
+            }
+#endif
         }
 
         setWuid(workunit.queryWuid(), workunit.queryClusterName());