فهرست منبع

HPCC-16481 Persistent Thor Performance Monitor for UDP stats

Signed-off-by: Mark Kelly <mark.kelly@lexisnexis.com>
Mark Kelly 8 سال پیش
والد
کامیت
dae8ded528
5فایلهای تغییر یافته به همراه36 افزوده شده و 11 حذف شده
  1. 21 3
      system/jlib/jdebug.cpp
  2. 1 0
      system/jlib/jdebug.hpp
  3. 3 7
      thorlcr/graph/thgraph.cpp
  4. 5 1
      thorlcr/master/thmastermain.cpp
  5. 6 0
      thorlcr/slave/thslavemain.cpp

+ 21 - 3
system/jlib/jdebug.cpp

@@ -2305,7 +2305,7 @@ static class CMemoryUsageReporter: public Thread
     unsigned  interval;
     Semaphore sem;
     PerfMonMode traceMode;
-    IPerfMonHook * hook;
+    Linked<IPerfMonHook> hook;
     unsigned latestCPU;
 #if defined(USE_OLD_PU) || defined(_WIN32)
     double                         dbIdleTime;
@@ -2337,7 +2337,7 @@ public:
 
     {
         interval = _interval;
-        hook = _hook;
+        hook.set(_hook);
         term = false;
         latestCPU = 0;
 #ifdef _WIN32
@@ -2364,6 +2364,7 @@ public:
 
     void setPrimaryFileSystem(char const * _primaryfs)
     {
+        CriticalBlock block(sect);
         primaryfs.clear();
         if(_primaryfs)
             primaryfs.append(_primaryfs);
@@ -2371,6 +2372,7 @@ public:
 
     void setSecondaryFileSystem(char const * _secondaryfs)
     {
+        CriticalBlock block(sect);
         secondaryfs.clear();
         if(_secondaryfs)
             secondaryfs.append(_secondaryfs);
@@ -2640,7 +2642,10 @@ public:
                 if (traceMode&PerfMonExtended) {
                     if (extstats.getLine(str.clear()))
                         LOG(MCdebugInfo, unknownJob, "%s", str.str());
-                    extstats.printKLog(hook);
+                    {
+                        CriticalBlock block(sect);
+                        extstats.printKLog(hook);
+                    }
                 }
 #endif
             }
@@ -2660,6 +2665,13 @@ public:
     {
         return latestCPU;
     }
+
+    void setHook(IPerfMonHook *_hook)
+    {
+        CriticalBlock block(sect);
+        hook.set(_hook);
+    }
+
 } *MemoryUsageReporter=NULL;
 
 
@@ -2715,6 +2727,12 @@ void stopPerformanceMonitor()
     }
 }
 
+void setPerformanceMonitorHook(IPerfMonHook *hook)
+{
+    if (MemoryUsageReporter)
+        MemoryUsageReporter->setHook(hook);
+}
+
 void setPerformanceMonitorPrimaryFileSystem(char const * fs)
 {
     if(MemoryUsageReporter)

+ 1 - 0
system/jlib/jdebug.hpp

@@ -325,6 +325,7 @@ typedef unsigned PerfMonMode;
 void jlib_decl getSystemTraceInfo(StringBuffer &str, PerfMonMode mode = PerfMonProcMem);
 void jlib_decl startPerformanceMonitor(unsigned interval, PerfMonMode traceMode = PerfMonStandard, IPerfMonHook * hook = NULL);
 void jlib_decl stopPerformanceMonitor();
+void jlib_decl setPerformanceMonitorHook(IPerfMonHook *hook);
 void jlib_decl setPerformanceMonitorPrimaryFileSystem(char const * fs); // for monitoring disk1, defaults to C: (win) or / (linux)
 void jlib_decl setPerformanceMonitorSecondaryFileSystem(char const * fs); // for monitoring disk2, no default
 unsigned jlib_decl getLatestCPUUsage();

+ 3 - 7
thorlcr/graph/thgraph.cpp

@@ -2674,12 +2674,8 @@ void CJobBase::startJob()
 {
     LOG(MCdebugProgress, thorJob, "New Graph started : %s", graphName.get());
     ClearTempDirs();
-    unsigned pinterval = globals->getPropInt("@system_monitor_interval",1000*60);
-    if (pinterval)
-    {
-        perfmonhook.setown(createThorMemStatsPerfMonHook(*this, getOptInt(THOROPT_MAX_KERNLOG, 3)));
-        startPerformanceMonitor(pinterval,PerfMonStandard,perfmonhook);
-    }
+    perfmonhook.setown(createThorMemStatsPerfMonHook(*this, getOptInt(THOROPT_MAX_KERNLOG, 3)));
+    setPerformanceMonitorHook(perfmonhook);
     PrintMemoryStatusLog();
     logDiskSpace();
     unsigned keyNodeCacheMB = (unsigned)getWorkUnitValueInt("keyNodeCacheMB", DEFAULT_KEYNODECACHEMB * queryJobChannels());
@@ -2699,7 +2695,7 @@ void CJobBase::startJob()
 
 void CJobBase::endJob()
 {
-    stopPerformanceMonitor();
+    setPerformanceMonitorHook(nullptr);
     LOG(MCdebugProgress, thorJob, "Job ended : %s", graphName.get());
     clearKeyStoreCache(true);
     PrintMemoryStatusLog();

+ 5 - 1
thorlcr/master/thmastermain.cpp

@@ -549,7 +549,6 @@ int main( int argc, char *argv[]  )
 #endif
     const char *thorname = NULL;
     StringBuffer nodeGroup, logUrl;
-    Owned<IPerfMonHook> perfmonhook;
     unsigned channelsPerSlave = 1;
 
     ILogMsgHandler *logHandler;
@@ -814,6 +813,10 @@ int main( int argc, char *argv[]  )
 
             writeSentinelFile(sentinelFile);
 
+            unsigned pinterval = globals->getPropInt("@system_monitor_interval",1000*60);
+            if (pinterval)
+                startPerformanceMonitor(pinterval, PerfMonStandard, nullptr);
+
             thorMain(logHandler);
             LOG(daliAuditLogCat, ",Progress,Thor,Terminate,%s,%s,%s",thorname,nodeGroup.str(),queueName.str());
         }
@@ -826,6 +829,7 @@ int main( int argc, char *argv[]  )
         FLLOG(MCexception(e), thorJob, e,"ThorMaster");
         e->Release();
     }
+    stopPerformanceMonitor();
     disconnectLogMsgManagerFromDali();
     closeThorServerStatus();
     if (globals) globals->Release();

+ 6 - 0
thorlcr/slave/thslavemain.cpp

@@ -473,6 +473,11 @@ int main( int argc, char *argv[]  )
                 else
                     multiThorMemoryThreshold = 0;
             }
+
+            unsigned pinterval = globals->getPropInt("@system_monitor_interval",1000*60);
+            if (pinterval)
+                startPerformanceMonitor(pinterval, PerfMonStandard, nullptr);
+
             slaveMain(jobListenerStopped);
         }
 
@@ -484,6 +489,7 @@ int main( int argc, char *argv[]  )
             FLLOG(MCexception(e), thorJob, e,"ThorSlave");
         unregisterException.setown(e);
     }
+    stopPerformanceMonitor();
     ClearTempDirs();
 
     if (multiThorMemoryThreshold)