Bladeren bron

Merge pull request #9226 from ghalliday/issue16421

HPCC-16421 Improve register usage, and prefetch the next potential address

Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 8 jaren geleden
bovenliggende
commit
75f75c1896
2 gewijzigde bestanden met toevoegingen van 7 en 3 verwijderingen
  1. 4 2
      roxie/roxiemem/roxiemem.cpp
  2. 3 1
      system/include/platform.h

+ 4 - 2
roxie/roxiemem/roxiemem.cpp

@@ -3080,18 +3080,20 @@ char * ChunkedHeaplet::allocateSingle(unsigned allocated, bool incCounter)
 
         {
             //Scan through all the memory, checking for a block marked as free - should terminate very quickly unless highly fragmented
-            size32_t offset = nextMatchOffset;
+            const size_t startOffset = nextMatchOffset;
+            size32_t offset = startOffset;
             loop
             {
                 ret = data() + offset;
                 offset += size;
                 if (offset == curFreeBase)
                     offset = 0;
+                __builtin_prefetch(data() + offset);
 
                 if (((std::atomic_uint *)ret)->load(std::memory_order_relaxed) == FREE_ROW_COUNT)
                     break;
 
-                if (offset == nextMatchOffset)
+                if (offset == startOffset)
                 {
                     //Should never occur...
                     return nullptr;

+ 3 - 1
system/include/platform.h

@@ -179,7 +179,9 @@ typedef memsize_t rowsize_t;
 #define GetSharedProcedure(h,name) GetProcAddress(h,(char *)name)
 #define LoadSucceeded(h)           ((unsigned)h >= 32)
 #define GetSharedObjectError()     GetLastError()
-#define strtok_r(a,b,c)             j_strtok_r(a,b,c)
+#define strtok_r(a,b,c)            j_strtok_r(a,b,c)
+#define __builtin_prefetch(addr)   _mm_prefetch((const char *)(addr), _MM_HINT_T0)
+
 #define __thread __declspec(thread)
 
 typedef unsigned __int64 off64_t;