فهرست منبع

HPCC-16537 Keep track of the number of retries accessing remote files

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 8 سال پیش
والد
کامیت
f5acbc365d
4فایلهای تغییر یافته به همراه31 افزوده شده و 9 حذف شده
  1. 24 5
      common/remote/sockfile.cpp
  2. 1 0
      system/jlib/jstatcodes.h
  3. 5 4
      system/jlib/jstats.cpp
  4. 1 0
      thorlcr/graph/thgraphmaster.cpp

+ 24 - 5
common/remote/sockfile.cpp

@@ -2162,6 +2162,7 @@ protected:
     std::atomic<__uint64> ioWriteBytes;
     std::atomic<__uint64> ioReads;
     std::atomic<__uint64> ioWrites;
+    std::atomic<unsigned> ioRetries;
     IFOmode mode;
     compatIFSHmode compatmode;
     IFEflags extraFlags;
@@ -2169,8 +2170,7 @@ protected:
 public:
     IMPLEMENT_IINTERFACE
     CRemoteFileIO(CRemoteFile *_parent)
-        : parent(_parent), ioReadCycles(0), ioWriteCycles(0), ioReadBytes(0), ioWriteBytes(0), ioReads(0), ioWrites(0)
-
+        : parent(_parent), ioReadCycles(0), ioWriteCycles(0), ioReadBytes(0), ioWriteBytes(0), ioReads(0), ioWrites(0), ioRetries(0)
     {
         handle = 0;
         disconnectonexit = false;
@@ -2290,6 +2290,8 @@ public:
             return ioReads.load(std::memory_order_relaxed);
         case StNumDiskWrites:
             return ioWrites.load(std::memory_order_relaxed);
+        case StNumDiskRetries:
+            return ioRetries.load(std::memory_order_relaxed);
         }
         return 0;
     }
@@ -2352,14 +2354,22 @@ public:
             }
             catch (IJSOCK_Exception *e) {
                 EXCLOG(e,"CRemoteFileIO::read");
-                if (++tries>3)
+                if (++tries > 3)
+                {
+                    ioRetries.fetch_add(tries);
                     throw;
+                }
                 WARNLOG("Retrying read of %s (%d)",parent->queryLocalName(),tries);
                 Owned<IException> exc = e;
                 if (!reopen())
+                {
+                    ioRetries.fetch_add(tries);
                     throw exc.getClear();
+                }
             }
         }
+        if (tries)
+            ioRetries.fetch_add(tries);
         got = 0;
         return NULL;
     }
@@ -2383,15 +2393,24 @@ public:
             }
             catch (IJSOCK_Exception *e) {
                 EXCLOG(e,"CRemoteFileIO::write");
-                if (++tries>3)
+                if (++tries > 3)
+                {
+                    ioRetries.fetch_add(tries);
                     throw;
+                }
                 WARNLOG("Retrying write(%" I64F "d,%d) of %s (%d)",pos,len,parent->queryLocalName(),tries);
                 Owned<IException> exc = e;
                 if (!reopen())
+                {
+                    ioRetries.fetch_add(tries);
                     throw exc.getClear();
-
+                }
             }
         }
+
+         if (tries)
+            ioRetries.fetch_add(tries);
+
         ioWriteCycles.fetch_add(timer.elapsedCycles());
         ioWriteBytes.fetch_add(ret);
         ++ioWrites;

+ 1 - 0
system/jlib/jstatcodes.h

@@ -183,6 +183,7 @@ enum StatisticKind
     StNumScansPerRow,
     StNumAllocations,
     StNumAllocationScans,
+    StNumDiskRetries,
 
     StMax,
 

+ 5 - 4
system/jlib/jstats.cpp

@@ -616,6 +616,7 @@ static const StatisticMeta statsMetaData[StMax] = {
     { NUMSTAT(ScansPerRow) },
     { NUMSTAT(Allocations) },
     { NUMSTAT(AllocationScans) },
+    { NUMSTAT(DiskRetries) },
 };
 
 
@@ -929,10 +930,10 @@ void StatisticsMapping::createMappings()
 
 const StatisticsMapping allStatistics;
 const StatisticsMapping heapStatistics(StNumAllocations, StNumAllocationScans, StKindNone);
-const StatisticsMapping diskLocalStatistics(StCycleDiskReadIOCycles, StSizeDiskRead, StNumDiskReads, StCycleDiskWriteIOCycles, StSizeDiskWrite, StNumDiskWrites, StKindNone);
-const StatisticsMapping diskRemoteStatistics(StTimeDiskReadIO, StSizeDiskRead, StNumDiskReads, StTimeDiskWriteIO, StSizeDiskWrite, StNumDiskWrites, StKindNone);
-const StatisticsMapping diskReadRemoteStatistics(StTimeDiskReadIO, StSizeDiskRead, StNumDiskReads, StKindNone);
-const StatisticsMapping diskWriteRemoteStatistics(StTimeDiskWriteIO, StSizeDiskWrite, StNumDiskWrites, StKindNone);
+const StatisticsMapping diskLocalStatistics(StCycleDiskReadIOCycles, StSizeDiskRead, StNumDiskReads, StCycleDiskWriteIOCycles, StSizeDiskWrite, StNumDiskWrites, StNumDiskRetries, StKindNone);
+const StatisticsMapping diskRemoteStatistics(StTimeDiskReadIO, StSizeDiskRead, StNumDiskReads, StTimeDiskWriteIO, StSizeDiskWrite, StNumDiskWrites, StNumDiskRetries, StKindNone);
+const StatisticsMapping diskReadRemoteStatistics(StTimeDiskReadIO, StSizeDiskRead, StNumDiskReads, StNumDiskRetries, StKindNone);
+const StatisticsMapping diskWriteRemoteStatistics(StTimeDiskWriteIO, StSizeDiskWrite, StNumDiskWrites, StNumDiskRetries, StKindNone);
 
 //--------------------------------------------------------------------------------------------------------------------
 

+ 1 - 0
thorlcr/graph/thgraphmaster.cpp

@@ -2755,6 +2755,7 @@ static bool suppressStatisticIfZero(StatisticKind kind)
     case StNumSpills:
     case StSizeSpillFile:
     case StTimeSpillElapsed:
+    case StNumDiskRetries:
         return true;
     }
     return false;