Browse Source

HPCC-12723 Optimize memory manager using the new functions

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 10 years ago
parent
commit
4023d41eb6
2 changed files with 48 additions and 48 deletions
  1. 44 45
      roxie/roxiemem/roxiemem.cpp
  2. 4 3
      testing/unittests/jlibtests.cpp

+ 44 - 45
roxie/roxiemem/roxiemem.cpp

@@ -18,6 +18,7 @@
 #include "roxiemem.hpp"
 #include "roxierowbuff.hpp"
 #include "jlog.hpp"
+#include "jset.hpp"
 #include <new>
 
 #ifndef _WIN32
@@ -516,13 +517,10 @@ static void *suballoc_aligned(size32_t pages, bool returnNullWhenExhausted)
             unsigned hbi = heapBitmap[i];
             if (hbi)
             {
-                unsigned mask = 1;
-                char *ret = heapBase + i*UNSIGNED_BITS*HEAP_ALIGNMENT_SIZE;
-                while (!(hbi & mask))
-                {
-                    ret += HEAP_ALIGNMENT_SIZE;
-                    mask <<= 1;
-                }
+                const unsigned pos = countTrailingUnsetBits(hbi);
+                const unsigned mask = 1U << pos;
+                const unsigned match = i*UNSIGNED_BITS + pos;
+                char *ret = heapBase + match*HEAP_ALIGNMENT_SIZE;
                 heapBitmap[i] = (hbi & ~mask);
                 heapLWM = i;
                 heapAllocated++;
@@ -638,32 +636,30 @@ static void subfree_aligned(void *ptr, unsigned pages = 1)
 
         if (wordOffset < heapLWM)
             heapLWM = wordOffset;
+
         loop
         {
             unsigned prev = heapBitmap[wordOffset];
-            if ((prev & mask) == 0)
+            if ((prev & mask) != 0)
+                HEAPERROR("RoxieMemMgr: Page freed twice");
+
+            unsigned next = prev | mask;
+            heapBitmap[wordOffset] = next;
+            if ((next == UNSIGNED_ALLBITS) && heapNotifyUnusedEachBlock)
             {
-                unsigned next = prev | mask;
-                heapBitmap[wordOffset] = next;
-                if ((next == UNSIGNED_ALLBITS) && heapNotifyUnusedEachBlock)
-                {
-                    char * address = heapBase + wordOffset * heapBlockSize;
-                    if (!firstReleaseBlock)
-                        firstReleaseBlock = address;
-                    lastReleaseBlock = address;
-                }
+                char * address = heapBase + wordOffset * heapBlockSize;
+                if (!firstReleaseBlock)
+                    firstReleaseBlock = address;
+                lastReleaseBlock = address;
             }
-            else
-                HEAPERROR("RoxieMemMgr: Page freed twice");
             if (!--pages)
                 break;
-            if (mask==TOPBITMASK)
+            mask <<= 1;
+            if (mask==0)
             {
                 mask = 1;
                 wordOffset++;
             }
-            else
-                mask <<= 1;
         }
     }
 
@@ -688,21 +684,19 @@ static void clearBits(unsigned start, unsigned len)
         unsigned heapword = heapBitmap[wordOffset];
         while (len--)
         {
-            if (heapword & mask)
-                heapword &= ~mask;
-            else
+            if ((heapword & mask) == 0)
                 HEAPERROR("RoxieMemMgr: Page freed twice");
-            if (mask==TOPBITMASK)
+            heapword &= ~mask;
+            mask <<= 1;
+            if (mask==0)
             {
                 heapBitmap[wordOffset] = heapword;
-                mask = 1;
                 wordOffset++;
                 if (wordOffset==heapBitmapSize)
                     return;    // Avoid read off end of array
                 heapword = heapBitmap[wordOffset];
+                mask = 1;
             }
-            else
-                mask <<= 1;
         }
         heapBitmap[wordOffset] = heapword;
     }
@@ -4775,21 +4769,21 @@ protected:
     }
 
 #ifdef __64BIT__
-    enum { numBitmapThreads = 20, maxBitmapSize = (unsigned)(I64C(0xFFFFFFFFFF) / HEAP_ALIGNMENT_SIZE / UNSIGNED_BITS) };      // Test larger range - in case we ever reduce the granularity
+    enum { maxBitmapThreads = 20, maxBitmapSize = (unsigned)(I64C(0xFFFFFFFFFF) / HEAP_ALIGNMENT_SIZE / UNSIGNED_BITS) };      // Test larger range - in case we ever reduce the granularity
 #else
     // Restrict heap sizes on 32-bit systems
-    enum { numBitmapThreads = 20, maxBitmapSize = (unsigned)(I64C(0xFFFFFFFF) / HEAP_ALIGNMENT_SIZE / UNSIGNED_BITS) };      // 4Gb
+    enum { maxBitmapThreads = 20, maxBitmapSize = (unsigned)(I64C(0xFFFFFFFF) / HEAP_ALIGNMENT_SIZE / UNSIGNED_BITS) };      // 4Gb
 #endif
     class BitmapAllocatorThread : public Thread
     {
     public:
-        BitmapAllocatorThread(Semaphore & _sem, unsigned _size) : Thread("AllocatorThread"), sem(_sem), size(_size)
+        BitmapAllocatorThread(Semaphore & _sem, unsigned _size, unsigned _numThreads) : Thread("AllocatorThread"), sem(_sem), size(_size), numThreads(_numThreads)
         {
         }
 
         int run()
         {
-            unsigned numBitmapIter = (maxBitmapSize * 32 / size) / numBitmapThreads;
+            unsigned numBitmapIter = (maxBitmapSize * 32 / size) / numThreads;
             sem.wait();
             memsize_t total = 0;
             for (unsigned i=0; i < numBitmapIter; i++)
@@ -4808,9 +4802,10 @@ protected:
     protected:
         Semaphore & sem;
         const unsigned size;
+        const unsigned numThreads;
         volatile memsize_t final;
     };
-    void testBitmapThreading(unsigned size)
+    void testBitmapThreading(unsigned size, unsigned numThreads)
     {
         HeapPreserver preserver;
 
@@ -4819,19 +4814,19 @@ protected:
         heapNotifyUnusedEachBlock = false;
 
         Semaphore sem;
-        BitmapAllocatorThread * threads[numBitmapThreads];
-        for (unsigned i1 = 0; i1 < numBitmapThreads; i1++)
-            threads[i1] = new BitmapAllocatorThread(sem, size);
-        for (unsigned i2 = 0; i2 < numBitmapThreads; i2++)
+        BitmapAllocatorThread * threads[maxBitmapThreads];
+        for (unsigned i1 = 0; i1 < numThreads; i1++)
+            threads[i1] = new BitmapAllocatorThread(sem, size, numThreads);
+        for (unsigned i2 = 0; i2 < numThreads; i2++)
             threads[i2]->start();
 
         unsigned startTime = msTick();
-        sem.signal(numBitmapThreads);
-        for (unsigned i3 = 0; i3 < numBitmapThreads; i3++)
+        sem.signal(numThreads);
+        for (unsigned i3 = 0; i3 < numThreads; i3++)
             threads[i3]->join();
         unsigned endTime = msTick();
 
-        for (unsigned i4 = 0; i4 < numBitmapThreads; i4++)
+        for (unsigned i4 = 0; i4 < numThreads; i4++)
             threads[i4]->Release();
         DBGLOG("Time taken for bitmap threading(%d) = %d", size, endTime-startTime);
 
@@ -4840,16 +4835,20 @@ protected:
         unsigned maxBlock;
         memstats(totalPages, freePages, maxBlock);
         ASSERT(totalPages == maxBitmapSize * 32);
-        unsigned numAllocated = ((maxBitmapSize * 32 / size) / numBitmapThreads) * numBitmapThreads * size;
+        unsigned numAllocated = ((maxBitmapSize * 32 / size) / numThreads) * numThreads * size;
         ASSERT(freePages == maxBitmapSize * 32 - numAllocated);
 
         delete[] heapBitmap;
     }
     void testBitmapThreading()
     {
-        testBitmapThreading(1);
-        testBitmapThreading(3);
-        testBitmapThreading(11);
+        //Don't run this with NOTIFY_UNUSED_PAGES_ON_FREE enabled - I'm not sure what the calls to map out random memory are likely to do!
+        testBitmapThreading(1, 1);
+        testBitmapThreading(3, 1);
+        testBitmapThreading(11, 1);
+        testBitmapThreading(1, maxBitmapThreads);
+        testBitmapThreading(3, maxBitmapThreads);
+        testBitmapThreading(11, maxBitmapThreads);
     }
 
     void testHuge()

+ 4 - 3
testing/unittests/jlibtests.cpp

@@ -166,7 +166,8 @@ protected:
         bool setValue = !initial;
         bool clearValue = initial;
         const unsigned numBits = 400;
-        for (unsigned pass=0; pass < 10000; pass++)
+        const unsigned passes = 10000;
+        for (unsigned pass=0; pass < passes; pass++)
         {
             Owned<IBitSet> bs = createThreadSafeBitSet();
             testSet1(initial, bs, 0, numBits, setValue, clearValue);
@@ -174,7 +175,7 @@ protected:
         unsigned elapsed = msTick()-now;
         fprintf(stdout, "Bit test (%u) time taken = %dms\n", initial, elapsed);
         now = msTick();
-        for (unsigned pass=0; pass < 10000; pass++)
+        for (unsigned pass=0; pass < passes; pass++)
         {
             Owned<IBitSet> bs = createBitSet();
             testSet1(initial, bs, 0, numBits, setValue, clearValue);
@@ -185,7 +186,7 @@ protected:
         size32_t bitSetMemSz = getBitSetMemoryRequirement(numBits+5);
         MemoryBuffer mb;
         void *mem = mb.reserveTruncate(bitSetMemSz);
-        for (unsigned pass=0; pass < 10000; pass++)
+        for (unsigned pass=0; pass < passes; pass++)
         {
             Owned<IBitSet> bs = createBitSet(bitSetMemSz, mem);
             testSet1(initial, bs, 0, numBits, setValue, clearValue);