瀏覽代碼

HPCC-12693 Improve the support for huge pages

This patch checks if transparent huge pages are supported, and if so
uses madvise to use huge pages for the allocated memory.  It also avoids
fragmentation problems in the transparent huge pages by ensuring that
memory returned to the system is always a multiple of the huge page size.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 10 年之前
父節點
當前提交
936410356e
共有 3 個文件被更改,包括 122 次插入16 次删除
  1. 76 15
      roxie/roxiemem/roxiemem.cpp
  2. 44 0
      system/jlib/jdebug.cpp
  3. 2 1
      system/jlib/jdebug.hpp

+ 76 - 15
roxie/roxiemem/roxiemem.cpp

@@ -97,6 +97,8 @@ static unsigned heapLWM;
 static unsigned heapLargeBlocks;
 static unsigned heapLargeBlocks;
 static unsigned heapLargeBlockGranularity;
 static unsigned heapLargeBlockGranularity;
 static ILargeMemCallback * heapLargeBlockCallback;
 static ILargeMemCallback * heapLargeBlockCallback;
+static bool heapNotifyUnusedEachFree = true;
+static bool heapNotifyUnusedEachBlock = false;
 static unsigned __int64 lastStatsCycles;
 static unsigned __int64 lastStatsCycles;
 static unsigned __int64 statsCyclesInterval;
 static unsigned __int64 statsCyclesInterval;
 
 
@@ -107,6 +109,7 @@ static atomic_t dataBuffersActive;
 const unsigned UNSIGNED_BITS = sizeof(unsigned) * 8;
 const unsigned UNSIGNED_BITS = sizeof(unsigned) * 8;
 const unsigned UNSIGNED_ALLBITS = (unsigned) -1;
 const unsigned UNSIGNED_ALLBITS = (unsigned) -1;
 const unsigned TOPBITMASK = 1<<(UNSIGNED_BITS-1);
 const unsigned TOPBITMASK = 1<<(UNSIGNED_BITS-1);
+const memsize_t heapBlockSize = UNSIGNED_BITS*HEAP_ALIGNMENT_SIZE;
 
 
 template <typename VALUE_TYPE, typename ALIGN_TYPE>
 template <typename VALUE_TYPE, typename ALIGN_TYPE>
 inline VALUE_TYPE align_pow2(VALUE_TYPE value, ALIGN_TYPE alignment)
 inline VALUE_TYPE align_pow2(VALUE_TYPE value, ALIGN_TYPE alignment)
@@ -116,6 +119,18 @@ inline VALUE_TYPE align_pow2(VALUE_TYPE value, ALIGN_TYPE alignment)
 
 
 #define PAGES(x, alignment)    (((x) + ((alignment)-1)) / (alignment))           // hope the compiler converts to a shift
 #define PAGES(x, alignment)    (((x) + ((alignment)-1)) / (alignment))           // hope the compiler converts to a shift
 
 
+inline void notifyMemoryUnused(void * address, memsize_t size)
+{
+#ifdef NOTIFY_UNUSED_PAGES_ON_FREE
+#ifdef _WIN32
+        VirtualAlloc(address, size, MEM_RESET, PAGE_READWRITE);
+#else
+        // for linux mark as unwanted
+        madvise(address,size,MADV_DONTNEED);
+#endif
+#endif
+}
+
 //---------------------------------------------------------------------------------------------------------------------
 //---------------------------------------------------------------------------------------------------------------------
 
 
 typedef MapBetween<unsigned, unsigned, memsize_t, memsize_t> MapActivityToMemsize;
 typedef MapBetween<unsigned, unsigned, memsize_t, memsize_t> MapActivityToMemsize;
@@ -157,6 +172,8 @@ static void initializeHeap(bool allowHugePages, unsigned pages, unsigned largeBl
         if (heapBase != MAP_FAILED)
         if (heapBase != MAP_FAILED)
         {
         {
             heapUseHugePages = true;
             heapUseHugePages = true;
+            //MORE: At the moment I'm not sure calling madvise() has any benefit, but needs testing before releasing
+            //heapNotifyUnusedPagesOnFree = false;
             DBGLOG("Using Huge Pages for roxiemem");
             DBGLOG("Using Huge Pages for roxiemem");
         }
         }
         else
         else
@@ -172,8 +189,13 @@ static void initializeHeap(bool allowHugePages, unsigned pages, unsigned largeBl
 
 
     if (!heapBase)
     if (!heapBase)
     {
     {
+        const memsize_t hugePageSize = getHugePageSize();
+        memsize_t heapAlignment = allowHugePages ? hugePageSize : HEAP_ALIGNMENT_SIZE;
+        if (heapAlignment < HEAP_ALIGNMENT_SIZE)
+            heapAlignment = HEAP_ALIGNMENT_SIZE;
+
         int ret;
         int ret;
-        if ((ret = posix_memalign((void **) &heapBase, HEAP_ALIGNMENT_SIZE, memsize)) != 0) {
+        if ((ret = posix_memalign((void **) &heapBase, heapAlignment, memsize)) != 0) {
 
 
         	switch (ret)
         	switch (ret)
         	{
         	{
@@ -197,6 +219,29 @@ static void initializeHeap(bool allowHugePages, unsigned pages, unsigned largeBl
         	}
         	}
             HEAPERROR("RoxieMemMgr: Unable to create heap");
             HEAPERROR("RoxieMemMgr: Unable to create heap");
         }
         }
+
+        //If we are allowed to use huge pages, then mark huge pages as beneficial
+        if (allowHugePages)
+        {
+            if (areTransparentHugePagesEnabled())
+            {
+                if (madvise(heapBase,memsize,MADV_HUGEPAGE) == 0)
+                {
+                    //Prevent the transparent huge page code from working hard trying to defragment memory when single heaplets are released
+                    heapNotifyUnusedEachFree = false;
+                    if ((heapBlockSize % hugePageSize) == 0)
+                    {
+                        //If we notify heapBlockSize items at a time it will always be a multiple of hugePageSize so shouldn't trigger defragmentation
+                        heapNotifyUnusedEachBlock = true;
+                        DBGLOG("Heap advised as worth using huge pages - memory released in blocks");
+                    }
+                    else
+                        DBGLOG("Heap advised as worth using huge pages - MEMORY WILL NOT BE RELEASED");
+                }
+            }
+            else
+                DBGLOG("Huge pages requested, but transparent huge pages currently disabled");
+        }
     }
     }
 #endif
 #endif
 
 
@@ -553,18 +598,15 @@ static void subfree_aligned(void *ptr, unsigned pages = 1)
         DBGLOG("RoxieMemMgr: Incorrect alignment of freed area (ptr=%p)", ptr);
         DBGLOG("RoxieMemMgr: Incorrect alignment of freed area (ptr=%p)", ptr);
         HEAPERROR("RoxieMemMgr: Incorrect alignment of freed area");
         HEAPERROR("RoxieMemMgr: Incorrect alignment of freed area");
     }
     }
-#ifdef NOTIFY_UNUSED_PAGES_ON_FREE
-#ifdef _WIN32
-    VirtualAlloc(ptr, pages*HEAP_ALIGNMENT_SIZE, MEM_RESET, PAGE_READWRITE);
-#else
-    // for linux mark as unwanted
-    madvise(ptr,pages*HEAP_ALIGNMENT_SIZE,MADV_DONTNEED);
-#endif
-#endif
+    if (heapNotifyUnusedEachFree)
+        notifyMemoryUnused(ptr, pages*HEAP_ALIGNMENT_SIZE);
+
     unsigned wordOffset = (unsigned) (pageOffset / UNSIGNED_BITS);
     unsigned wordOffset = (unsigned) (pageOffset / UNSIGNED_BITS);
     unsigned bitOffset = (unsigned) (pageOffset % UNSIGNED_BITS);
     unsigned bitOffset = (unsigned) (pageOffset % UNSIGNED_BITS);
     unsigned mask = 1<<bitOffset;
     unsigned mask = 1<<bitOffset;
     unsigned nextPageOffset = (pageOffset+pages + (UNSIGNED_BITS-1)) / UNSIGNED_BITS;
     unsigned nextPageOffset = (pageOffset+pages + (UNSIGNED_BITS-1)) / UNSIGNED_BITS;
+    char * firstReleaseBlock = NULL;
+    char * lastReleaseBlock = NULL;
     {
     {
         CriticalBlock b(heapBitCrit);
         CriticalBlock b(heapBitCrit);
         heapAllocated -= pages;
         heapAllocated -= pages;
@@ -590,7 +632,17 @@ static void subfree_aligned(void *ptr, unsigned pages = 1)
         {
         {
             unsigned prev = heapBitmap[wordOffset];
             unsigned prev = heapBitmap[wordOffset];
             if ((prev & mask) == 0)
             if ((prev & mask) == 0)
-                heapBitmap[wordOffset] = (prev|mask);
+            {
+                unsigned next = prev | mask;
+                heapBitmap[wordOffset] = next;
+                if ((next == UNSIGNED_ALLBITS) && heapNotifyUnusedEachBlock)
+                {
+                    char * address = heapBase + wordOffset * heapBlockSize;
+                    if (!firstReleaseBlock)
+                        firstReleaseBlock = address;
+                    lastReleaseBlock = address;
+                }
+            }
             else
             else
                 HEAPERROR("RoxieMemMgr: Page freed twice");
                 HEAPERROR("RoxieMemMgr: Page freed twice");
             if (!--pages)
             if (!--pages)
@@ -604,6 +656,10 @@ static void subfree_aligned(void *ptr, unsigned pages = 1)
                 mask <<= 1;
                 mask <<= 1;
         }
         }
     }
     }
+
+    if (firstReleaseBlock)
+        notifyMemoryUnused(firstReleaseBlock, (lastReleaseBlock - firstReleaseBlock) + heapBlockSize);
+
     if (memTraceLevel >= 2)
     if (memTraceLevel >= 2)
         DBGLOG("RoxieMemMgr: subfree_aligned() %u pages ok - addr=%p heapLWM=%u totalPages=%u", _pages, ptr, heapLWM, heapTotalPages);
         DBGLOG("RoxieMemMgr: subfree_aligned() %u pages ok - addr=%p heapLWM=%u totalPages=%u", _pages, ptr, heapLWM, heapTotalPages);
 }
 }
@@ -4529,6 +4585,8 @@ protected:
             _heapLWM = heapLWM;
             _heapLWM = heapLWM;
             _heapAllocated = heapAllocated;
             _heapAllocated = heapAllocated;
             _heapUseHugePages = heapUseHugePages;
             _heapUseHugePages = heapUseHugePages;
+            _heapNotifyUnusedEachFree = heapNotifyUnusedEachFree;
+            _heapNotifyUnusedEachBlock = heapNotifyUnusedEachBlock;
         }
         }
         ~HeapPreserver()
         ~HeapPreserver()
         {
         {
@@ -4540,6 +4598,8 @@ protected:
             heapLWM = _heapLWM;
             heapLWM = _heapLWM;
             heapAllocated = _heapAllocated;
             heapAllocated = _heapAllocated;
             heapUseHugePages = _heapUseHugePages;
             heapUseHugePages = _heapUseHugePages;
+            heapNotifyUnusedEachFree = _heapNotifyUnusedEachFree;
+            heapNotifyUnusedEachBlock = _heapNotifyUnusedEachBlock;
         }
         }
         char *_heapBase;
         char *_heapBase;
         char *_heapEnd;
         char *_heapEnd;
@@ -4549,6 +4609,8 @@ protected:
         unsigned _heapLWM;
         unsigned _heapLWM;
         unsigned _heapAllocated;
         unsigned _heapAllocated;
         bool _heapUseHugePages;
         bool _heapUseHugePages;
+        bool _heapNotifyUnusedEachFree;
+        bool _heapNotifyUnusedEachBlock;
     };
     };
     void initBitmap(unsigned size)
     void initBitmap(unsigned size)
     {
     {
@@ -4743,6 +4805,8 @@ protected:
         HeapPreserver preserver;
         HeapPreserver preserver;
 
 
         initBitmap(maxBitmapSize);
         initBitmap(maxBitmapSize);
+        heapNotifyUnusedEachFree = false; // prevent calls to map out random chunks of memory!
+        heapNotifyUnusedEachBlock = false;
 
 
         Semaphore sem;
         Semaphore sem;
         BitmapAllocatorThread * threads[numBitmapThreads];
         BitmapAllocatorThread * threads[numBitmapThreads];
@@ -4773,12 +4837,9 @@ protected:
     }
     }
     void testBitmapThreading()
     void testBitmapThreading()
     {
     {
-#ifndef NOTIFY_UNUSED_PAGES_ON_FREE
-        //Don't run this with NOTIFY_UNUSED_PAGES_ON_FREE enabled - I'm not sure what the calls to map out random memory are likely to do!
         testBitmapThreading(1);
         testBitmapThreading(1);
         testBitmapThreading(3);
         testBitmapThreading(3);
         testBitmapThreading(11);
         testBitmapThreading(11);
-#endif
     }
     }
 
 
     void testHuge()
     void testHuge()
@@ -5670,7 +5731,7 @@ public:
 protected:
 protected:
     void testSetup()
     void testSetup()
     {
     {
-        setTotalMemoryLimit(false, memorySize, 0, NULL, NULL);
+        setTotalMemoryLimit(true, memorySize, 0, NULL, NULL);
     }
     }
 
 
     void testCleanup()
     void testCleanup()
@@ -5866,7 +5927,7 @@ public:
 protected:
 protected:
     void testSetup()
     void testSetup()
     {
     {
-        setTotalMemoryLimit(false, hugeMemorySize, 0, NULL, NULL);
+        setTotalMemoryLimit(true, hugeMemorySize, 0, NULL, NULL);
     }
     }
 
 
     void testCleanup()
     void testCleanup()

+ 44 - 0
system/jlib/jdebug.cpp

@@ -2778,6 +2778,50 @@ void PrintMemoryReport(bool full)
 #endif
 #endif
 
 
 
 
+bool areTransparentHugePagesEnabled()
+{
+#ifdef __linux__
+    StringBuffer contents;
+    try
+    {
+        contents.loadFile("/sys/kernel/mm/transparent_hugepage/enabled");
+        return !strstr(contents.str(), "[never]");
+    }
+    catch (IException * e)
+    {
+        e->Release();
+    }
+#endif
+    return false;
+}
+
+memsize_t getHugePageSize()
+{
+#ifdef __linux__
+    StringBuffer contents;
+    try
+    {
+        //Search for an entry   Hugepagesize:      xxxx kB
+        const char * const tag = "Hugepagesize:";
+        contents.loadFile("/proc/meminfo");
+        const char * hugepage = strstr(contents.str(), tag);
+        if (hugepage)
+        {
+            const char * next = hugepage + strlen(tag);
+            char * end;
+            memsize_t size = strtoul(next, &end, 10);
+            if (strncmp(end, " kB", 3) == 0)
+                return size * 0x400;
+        }
+    }
+    catch (IException * e)
+    {
+        e->Release();
+    }
+#endif
+    return 0x200000; // Default for an x86 system
+}
+
 //===========================================================================
 //===========================================================================
 
 
 #ifdef LEAK_CHECK
 #ifdef LEAK_CHECK

+ 2 - 1
system/jlib/jdebug.hpp

@@ -307,7 +307,8 @@ extern jlib_decl unsigned getAffinityCpus();
 extern jlib_decl void printProcMap(const char *fn, bool printbody, bool printsummary, StringBuffer *lnout, MemoryBuffer *mb, bool useprintf);
 extern jlib_decl void printProcMap(const char *fn, bool printbody, bool printsummary, StringBuffer *lnout, MemoryBuffer *mb, bool useprintf);
 extern jlib_decl void PrintMemoryReport(bool full=true);
 extern jlib_decl void PrintMemoryReport(bool full=true);
 extern jlib_decl void printAllocationSummary();
 extern jlib_decl void printAllocationSummary();
-
+extern jlib_decl bool areTransparentHugePagesEnabled();
+extern jlib_decl memsize_t getHugePageSize();
 
 
 #endif
 #endif