Преглед изворни кода

HPCC-26383 Add utility classes for tracking blocked time

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday пре 3 година
родитељ
комит
e82b4c895c
3 измењених фајлова са 268 додато и 1 уклоњено
  1. 50 0
      system/jlib/jdebug.cpp
  2. 85 0
      system/jlib/jdebug.hpp
  3. 133 1
      testing/unittests/jlibtests.cpp

+ 50 - 0
system/jlib/jdebug.cpp

@@ -645,6 +645,56 @@ MODULE_EXIT()
 
 //===========================================================================
 
+
+void BlockedTimeTracker::noteWaiting()
+{
+    CriticalBlock block(cs);
+    numWaiting++;
+    timeStampTally -= get_cycles_now();
+}
+
+void BlockedTimeTracker::noteComplete()
+{
+    CriticalBlock block(cs);
+    numWaiting--;
+    timeStampTally += get_cycles_now();
+}
+
+__uint64 BlockedTimeTracker::getWaitingNs() const
+{
+    unsigned active;
+    cycle_t tally;
+    {
+        CriticalBlock block(cs);
+        active = numWaiting;
+        tally = timeStampTally;
+    }
+
+    if (active != 0)
+    {
+        cycle_t now = get_cycles_now();
+        tally += active * now;
+    }
+
+    return cycle_to_nanosec(tally);
+}
+
+
+__uint64 LightweightBlockedTimeTracker::getWaitingNs() const
+{
+    __uint64 tally = timeStampTally.load(std::memory_order_acquire); // read the value once atomically
+    unsigned active = tally % MAX_ACTIVE;
+    if (active != 0)
+    {
+        cycle_t now = get_cycles_now();
+        tally += active * now * MAX_ACTIVE;
+    }
+
+    return cycle_to_nanosec(tally / MAX_ACTIVE);
+}
+
+//===========================================================================
+
 #ifdef _WIN32
 
 typedef enum _PROCESSINFOCLASS {

+ 85 - 0
system/jlib/jdebug.hpp

@@ -22,6 +22,7 @@
 
 #include "jiface.hpp"
 #include "jstats.h"
+#include <atomic>
 
 #define TIMING
 
@@ -225,6 +226,90 @@ private:
 };
 
 
+//---------------------------------------------------------------------------------------------------------------------
+
+/*
+There are several situations where we want to record the time spent waiting for items to be processed - time an item
+is queued before processing, time spent calling LDAP from within esp etc.. Gathering the metric has a few complications:
+* The metric gathering needs to have minimal impact
+* The waiting time needs to include items that are currently waiting as well completed
+* Multiple items can be being waited for at the same time, and complete at different times.
+
+    waitingTime = sum(itemTime)
+    = sum(completedItemTime) + sum(inflightItemTime)
+    = sum(endCompletedTime-startCompletedTime) + sum(currentTime-startInflightTime))
+    = sumEndCompletedTime - sumStartCompletedTime + sum(currentTime) - sumStartInflightTime
+    = sumEndCompletedTime + num*current - sumStartTime
+
+The following pattern efficiently solves the problem:
+
+ * When an item is queued/started atomically increment a counter, and add the current timestamp to the accumulated start time stamps.
+ * When an items is dequeued/complete atomically decrement a counter and add the current timestamp to the accumulated end time stamps.
+ * To calculate the waiting time use (sumEndTimestamps + numWaiting * currentTimeStamp) - sumStartTimestamps.
+
+
+At first glance this appears to have a problem because sumEndTimestamps will quickly overflow, but because of the
+properties of modulo arithmetic, the final result will be correct even if it has overflowed!
+Also since you are only ever interested in (sumEndTimestamps - sumStartTimestamps) the same field can be used for both.
+
+There are two versions, one that uses a critical section, and a second that uses atomics, but is limited to the number
+of active blocked items.
+*/
+
+class jlib_decl BlockedTimeTracker
+{
+public:
+    BlockedTimeTracker() = default;
+    BlockedTimeTracker(const BlockedTimeTracker &) = delete;
+
+    void noteWaiting();
+    void noteComplete();
+    __uint64 getWaitingNs() const;
+
+private:
+    mutable CriticalSection cs;
+    unsigned numWaiting = 0;
+    cycle_t timeStampTally = 0;
+};
+
+class jlib_decl BlockedSection
+{
+public:
+    BlockedSection(BlockedTimeTracker & _tracker) : tracker(_tracker) { tracker.noteWaiting(); }
+    ~BlockedSection() { tracker.noteComplete(); }
+private:
+    BlockedTimeTracker & tracker;
+};
+
+//Lightweight version that uses a single atomic, but has a limit on the number of active blocked items
+//Easier to understand by looking at the code for the BlockedTimeTracker class
+//Could be a template class (for MAX_ACTIVE), but I doubt it is worth it.
+class jlib_decl LightweightBlockedTimeTracker
+{
+    //MAX_ACTIVE should be a power of 2 for efficiency, 256 gives a max blocked time of about half a year before wrapping.
+    static constexpr unsigned MAX_ACTIVE = 256;
+public:
+    LightweightBlockedTimeTracker() = default;
+    LightweightBlockedTimeTracker(const LightweightBlockedTimeTracker &) = delete;
+
+    void noteWaiting()                  { timeStampTally.fetch_sub((get_cycles_now() * MAX_ACTIVE) - 1); } // i.e. add 1 and subtract the time
+    void noteComplete()                 { timeStampTally.fetch_add((get_cycles_now() * MAX_ACTIVE) - 1); }
+    __uint64 getWaitingNs() const;
+
+private:
+    std::atomic<__uint64> timeStampTally{0};            // timestamp * MAX_ACTIVE + active
+};
+
+class jlib_decl LightweightBlockedSection
+{
+public:
+    LightweightBlockedSection(LightweightBlockedTimeTracker & _tracker) : tracker(_tracker) { tracker.noteWaiting(); }
+    ~LightweightBlockedSection() { tracker.noteComplete(); }
+private:
+    LightweightBlockedTimeTracker & tracker;
+};
+
+
 //===========================================================================
 #ifndef USING_MPATROL
 #ifdef _DEBUG

+ 133 - 1
testing/unittests/jlibtests.cpp

@@ -2592,7 +2592,7 @@ class HashTableTests : public CppUnit::TestFixture
             hv = hashc((const byte *)&i,sizeof(i), hv);
             inputHvSum += hv;
             cache.add(i, hv);
-            unsigned lookupHv;
+            unsigned lookupHv = 0;
             CPPUNIT_ASSERT(cache.get(i, lookupHv));
             lookupHvSum += lookupHv;
         }
@@ -2610,5 +2610,137 @@ class HashTableTests : public CppUnit::TestFixture
 CPPUNIT_TEST_SUITE_REGISTRATION( HashTableTests );
 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( HashTableTests, "HashTableTests" );
 
+class BlockedTimingTests : public CppUnit::TestFixture
+{
+    static constexpr bool trace = false;
+
+    CPPUNIT_TEST_SUITE( BlockedTimingTests );
+        CPPUNIT_TEST(testStandard);
+        CPPUNIT_TEST(testStandard2);
+        CPPUNIT_TEST(testStandard3);
+        CPPUNIT_TEST(testLightweight);
+        CPPUNIT_TEST(testLightweight2);
+    CPPUNIT_TEST_SUITE_END();
+
+    void testStandard()
+    {
+        BlockedTimeTracker tracker;
+
+        __uint64 blockTime;
+        {
+            BlockedSection block(tracker);
+            MilliSleep(15);
+            blockTime = tracker.getWaitingNs();
+        }
+        __uint64 postBlockTime = tracker.getWaitingNs();
+        __uint64 expected = 15000000;
+        CPPUNIT_ASSERT(blockTime >= expected);
+        CPPUNIT_ASSERT(blockTime <= expected + 2000000);
+        CPPUNIT_ASSERT(postBlockTime - blockTime <= 1000000);
+        if (trace)
+            printf("%" I64F "u %" I64F "u\n", blockTime-50000000, postBlockTime-blockTime);
+    }
+
+    void testStandard2()
+    {
+        BlockedTimeTracker tracker;
+
+        __uint64 blockTime;
+        {
+            BlockedSection block3(tracker);
+            MilliSleep(10);
+            {
+                BlockedSection block2(tracker);
+                MilliSleep(20);
+                {
+                    BlockedSection block2(tracker);
+                    MilliSleep(3);
+                    blockTime = tracker.getWaitingNs();
+                }
+            }
+        }
+        __uint64 postBlockTime = tracker.getWaitingNs();
+        __uint64 expected = 10000000 + 2 * 20000000 + 3 * 3000000;
+        CPPUNIT_ASSERT(blockTime >= expected);
+        CPPUNIT_ASSERT(blockTime <= expected + 2000000);
+        CPPUNIT_ASSERT(postBlockTime - blockTime <= 1000000);
+        if (trace)
+            printf("%" I64F "u %" I64F "u\n", blockTime-expected, postBlockTime-blockTime);
+    }
+
+    void testStandard3()
+    {
+        BlockedTimeTracker tracker;
+
+        __uint64 blockTime;
+        {
+            auto action = COnScopeExit([&](){ tracker.noteComplete(); });
+            auto action2(COnScopeExit([&](){ tracker.noteComplete(); }));
+            tracker.noteWaiting();
+            tracker.noteWaiting();
+
+            MilliSleep(15);
+            blockTime = tracker.getWaitingNs();
+        }
+        __uint64 postBlockTime = tracker.getWaitingNs();
+        __uint64 expected = 15000000 * 2;
+        CPPUNIT_ASSERT(blockTime >= expected);
+        CPPUNIT_ASSERT(blockTime <= expected + 2000000);
+        CPPUNIT_ASSERT(postBlockTime - blockTime <= 1000000);
+        if (trace)
+            printf("%" I64F "u %" I64F "u\n", blockTime-50000000, postBlockTime-blockTime);
+    }
+
+    void testLightweight()
+    {
+        LightweightBlockedTimeTracker tracker;
+
+        __uint64 blockTime;
+        {
+            LightweightBlockedSection block(tracker);
+            MilliSleep(50);
+            blockTime = tracker.getWaitingNs();
+        }
+        __uint64 postBlockTime = tracker.getWaitingNs();
+        __uint64 expected = 50000000;
+        CPPUNIT_ASSERT(blockTime >= expected);
+        CPPUNIT_ASSERT(blockTime <= expected + 2000000);
+        CPPUNIT_ASSERT(postBlockTime - blockTime <= 1000000);
+        if (trace)
+            printf("%" I64F "u %" I64F "u\n", blockTime-50000000, postBlockTime-blockTime);
+    }
+
+    void testLightweight2()
+    {
+        LightweightBlockedTimeTracker tracker;
+
+        __uint64 blockTime;
+        {
+            LightweightBlockedSection block3(tracker);
+            MilliSleep(10);
+            {
+                LightweightBlockedSection block2(tracker);
+                MilliSleep(20);
+                {
+                    LightweightBlockedSection block2(tracker);
+                    MilliSleep(3);
+                    blockTime = tracker.getWaitingNs();
+                }
+            }
+        }
+        __uint64 postBlockTime = tracker.getWaitingNs();
+        __uint64 expected = 10000000 + 2 * 20000000 + 3 * 3000000;
+        CPPUNIT_ASSERT(blockTime >= expected);
+        CPPUNIT_ASSERT(blockTime <= expected + 2000000);
+        CPPUNIT_ASSERT(postBlockTime - blockTime <= 1000000);
+        if (trace)
+            printf("%" I64F "u %" I64F "u\n", blockTime-expected, postBlockTime-blockTime);
+    }
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION( BlockedTimingTests );
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( BlockedTimingTests, "BlockedTimingTests" );
+
+
 
 #endif // _USE_CPPUNIT