浏览代码

HPCC-26148 sigbus error on cache warming code

It's not clear why the error happened, though research suggests it may happen
if an NFS mount disconnects.

Recoded so that the warming is done in a separate process, and SIGBUF errors
are caught and reported.

Do not bother touching mmap for in-cache index pages, which will be loaded
anyway by subsequent code inside Roxie process.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 3 年之前
父节点
当前提交
f76843a4c8
共有 7 个文件被更改,包括 586 次插入216 次删除
  1. 3 0
      roxie/CMakeLists.txt
  2. 4 4
      roxie/ccd/CMakeLists.txt
  3. 99 212
      roxie/ccd/ccdfile.cpp
  4. 1 0
      roxie/ccd/ccdfile.hpp
  5. 73 0
      roxie/ccdcache/CMakeLists.txt
  6. 323 0
      roxie/ccdcache/ccdcache.cpp
  7. 83 0
      roxie/ccdcache/ccdcache.hpp

+ 3 - 0
roxie/CMakeLists.txt

@@ -14,6 +14,9 @@
 #    limitations under the License.
 ################################################################################
 HPCC_ADD_SUBDIRECTORY (ccd)
+IF (NOT WIN32)
+  HPCC_ADD_SUBDIRECTORY (ccdcache "PLATFORM")
+ENDIF()
 HPCC_ADD_SUBDIRECTORY (roxie "PLATFORM")
 HPCC_ADD_SUBDIRECTORY (topo "PLATFORM")
 HPCC_ADD_SUBDIRECTORY (roxiemem)

+ 4 - 4
roxie/ccd/CMakeLists.txt

@@ -27,7 +27,8 @@
 project( ccd ) 
 
 set (   SRCS 
-        ccdactivities.cpp 
+        ccdactivities.cpp
+        ../ccdcache/ccdcache.cpp
         ccddali.cpp
         ccdcontext.cpp
         ccddebug.cpp
@@ -68,6 +69,7 @@ include_directories (
          ${HPCC_SOURCE_DIR}/system/mp
          ${HPCC_SOURCE_DIR}/common/workunit
          ${HPCC_SOURCE_DIR}/roxie/udplib
+         ${HPCC_SOURCE_DIR}/roxie/ccdcache
          ${HPCC_SOURCE_DIR}/roxie/roxie
          ${HPCC_SOURCE_DIR}/common/environment
          ${HPCC_SOURCE_DIR}/ecl/hthor
@@ -139,6 +141,4 @@ ENDIF()
 IF (USE_TBBMALLOC AND USE_TBBMALLOC_ROXIE)
    add_dependencies ( ccd tbb )
    target_link_libraries ( ccd libtbbmalloc_proxy libtbbmalloc)
-ENDIF()
-
-
+ENDIF()

+ 99 - 212
roxie/ccd/ccdfile.cpp

@@ -35,6 +35,7 @@
 #include "ccdsnmp.hpp"
 #include "rmtfile.hpp"
 #include "ccdqueue.ipp"
+#include "ccdcache.hpp"
 #if defined(__linux__) || defined(__APPLE__)
 #include <sys/mman.h>
 #endif
@@ -624,57 +625,6 @@ typedef StringArray *StringArrayPtr;
 
 // A circular buffer recording recent disk read operations that can be used to "prewarm" the cache
 
-struct CacheInfoEntry
-{
-    //For convenience the values for PageType match the NodeX enumeration (see noteWarm).
-    //Ensure disk entries sort last so that index nodes take precedence when deduping offsets.
-    enum PageType : unsigned
-    {
-        PageTypeBranch = 0,
-        PageTypeLeaf = 1,
-        PageTypeBlob = 2,
-        PageTypeDisk = 3,
-    };
-
-    union
-    {
-        struct
-        {
-#ifndef _WIN32
-            unsigned type: 2;    // disk or the kind of index node
-            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
-            unsigned file: 24;   // Up to 4 million files
-#else
-//Windows does not like packing bitfields with different base types - fails the statck assert
-            __uint64 type: 2;    // disk or the kind of index node
-            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
-            __uint64 file: 24;   // Up to 4 million files
-#endif
-        } b;
-        __uint64 u;
-    };
-
-#ifndef _WIN32
-    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
-#elif _MSC_VER >= 1900
-    //Older versions of the windows compiler complain CacheInfoEntry::b is not a type name
-    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
-#endif
-
-    inline CacheInfoEntry() { u = 0; }
-    inline CacheInfoEntry(unsigned _file, offset_t _pos, PageType pageType)
-    {
-        b.file = _file;
-        b.page = _pos >> pageBits;
-        b.type = pageType;
-    }
-    inline bool operator < ( const CacheInfoEntry &l) const { return u < l.u; }
-    inline bool operator <= ( const CacheInfoEntry &l) const { return u <= l.u; }
-    inline void operator++ () { b.page++; }
-
-    static constexpr unsigned pageBits = 13;  // 8k 'pages'
-};
-
 class CacheReportingBuffer : public CInterfaceOf<ICacheInfoRecorder>
 {
     // A circular buffer recording recent file activity. Note that noteRead() and clear() may be called from multiple threads
@@ -817,6 +767,75 @@ private:
     }
 };
 
+class IndexCacheWarmer : implements ICacheWarmer
+{
+    IRoxieFileCache *cache = nullptr;
+    Owned<ILazyFileIO> localFile;
+    Owned<IKeyIndex> keyIndex;
+    bool keyFailed = false;
+    unsigned fileIdx = (unsigned) -1;
+    unsigned filesProcessed = 0;
+    unsigned pagesPreloaded = 0;
+public:
+    IndexCacheWarmer(IRoxieFileCache *_cache) : cache(_cache) {}
+
+    virtual void startFile(const char *filename) override
+    {
+        // "filename" is the filename that roxie would use if it copied the file locally.  This may not
+        // match the name of the actual file - e.g. if the file is local but in a different location.
+        localFile.setown(cache->lookupLocalFile(filename));
+        if (localFile)
+        {
+            fileIdx = localFile->getFileIdx();
+        }
+        keyFailed = false;
+        filesProcessed++;
+    }
+
+    virtual bool warmBlock(const char *filename, NodeType nodeType, offset_t startOffset, offset_t endOffset) override
+    {
+        if (nodeType != NodeNone && !keyFailed && localFile && !keyIndex)
+        {
+            //Pass false for isTLK - it will be initialised from the index header
+            keyIndex.setown(createKeyIndex(filename, localFile->getCrc(), *localFile.get(), fileIdx, false));
+            if (!keyIndex)
+                keyFailed = true;
+        }
+        if (nodeType != NodeNone && keyIndex)
+        {
+            // Round startOffset up to nearest multiple of index node size
+            unsigned nodeSize = keyIndex->getNodeSize();
+            startOffset = ((startOffset+nodeSize-1)/nodeSize)*nodeSize;
+            do
+            {
+                if (traceLevel > 8)
+                    DBGLOG("prewarming index page %u %s %" I64F "x-%" I64F "x", (int) nodeType, filename, startOffset, endOffset);
+                bool loaded = keyIndex->prewarmPage(startOffset, nodeType);
+                if (!loaded)
+                    break;
+                pagesPreloaded++;
+                startOffset += nodeSize;
+            }
+            while (startOffset < endOffset);
+        }
+        else if (fileIdx != (unsigned) -1)
+            cache->noteRead(fileIdx, startOffset, (endOffset-1) - startOffset);  // Ensure pages we prewarm are recorded in our cache tracker
+        return true;
+    }
+
+    virtual void endFile() override
+    {
+        localFile.clear();
+        keyIndex.clear();
+    }
+
+    virtual void report() override
+    {
+        if (traceLevel)
+            DBGLOG("Processed %u files and preloaded %u index nodes", filesProcessed, pagesPreloaded);
+    }
+};
+
 class CRoxieFileCache : implements IRoxieFileCache, implements ICopyFileProgress, public CInterface
 {
     friend class CcdFileTest;
@@ -1772,7 +1791,7 @@ public:
         return ret.getLink();
     }
 
-    ILazyFileIO *lookupLocalFile(const char *filename)
+    virtual ILazyFileIO *lookupLocalFile(const char *filename)
     {
         try
         {
@@ -1823,6 +1842,8 @@ public:
 
     virtual void loadSavedOsCacheInfo() override
     {
+        if (!topology->getPropBool("@warmOsCache", true))
+            return;
         Owned<const ITopologyServer> topology = getTopology();
         for (unsigned channel : topology->queryChannels())
             doLoadSavedOsCacheInfo(channel);
@@ -1839,16 +1860,35 @@ public:
         if (!dllserver_root)
             return;
 #endif
+        unsigned cacheWarmTraceLevel = topology->getPropInt("@cacheWarmTraceLevel", traceLevel);
         VStringBuffer cacheFileName("%s/%s/cacheInfo.%d", dllserver_root, roxieName.str(), channel);
         StringBuffer cacheInfo;
         try
         {
             if (checkFileExists(cacheFileName))
             {
+#ifndef _WIN32
+                StringBuffer output;
+                VStringBuffer command("ccdcache %s -t %u", cacheFileName.str(), cacheWarmTraceLevel);
+                unsigned retcode = runExternalCommand(nullptr, output, output, command, nullptr);
+                if (output.length())
+                {
+                    StringArray outputLines;
+                    outputLines.appendList(output, "\n");
+                    ForEachItemIn(idx, outputLines)
+                    {
+                        const char *line = outputLines.item(idx);
+                        if (line && *line)
+                            DBGLOG("ccdcache: %s", line);
+                    }
+                }
+                if (retcode)
+                    DBGLOG("ccdcache failed with exit code %u", retcode);
+#endif
                 cacheInfo.loadFile(cacheFileName, false);
-                warmOsCache(cacheInfo);
                 if (traceLevel)
-                    DBGLOG("Loaded cache information from %s for channel %d", cacheFileName.str(), channel);
+                    DBGLOG("Loading cache information from %s for channel %d", cacheFileName.str(), channel);
+                warmOsCache(cacheInfo);
             }
         }
         catch(IException *E)
@@ -1863,163 +1903,10 @@ public:
     {
         if (!cacheInfo)
             return;
-#ifndef _WIN32
-        size_t os_page_size = getpagesize();
-#endif
-        char t = 0;
-        unsigned touched = 0;
-        unsigned preloaded = 0;
-        Owned<const ITopologyServer> topology = getTopology();
-        while (*cacheInfo)
-        {
-            // We are parsing lines that look like:
-            // <channel>|<filename>|<pagelist>
-            //
-            // Where pagelist is a space-separated list of page numbers or (inclusive) ranges.
-            // A page number or range prefixed by a * means that the page(s) was found in the jhtree cache.
-            //
-            // For example,
-            // 1|/var/lib/HPCCSystems/hpcc-data/unknown/regress/multi/dg_index_evens._1_of_3|*0 3-4
-            // Pages are always recorded and specified as 8192 bytes (unless pagebits ever changes).
-
-            unsigned fileChannel = strtoul(cacheInfo, (char **) &cacheInfo, 10);
-            if (*cacheInfo != '|')
-                break;
-            if (!topology->implementsChannel(fileChannel))
-            {
-                const char *eol = strchr(cacheInfo, '\n');
-                if (!eol)
-                    break;
-                cacheInfo = eol+1;
-                continue;
-            }
-            cacheInfo++;
-            const char *endName = strchr(cacheInfo, '|');
-            assert(endName);
-            if (!endName)
-                break;
-            StringBuffer fileName(endName-cacheInfo, cacheInfo);
-            Owned<IKeyIndex> keyIndex;
-            bool keyFailed = false;
-            unsigned fileIdx = (unsigned) -1;
-#ifndef _WIN32
-            char *file_mmap = nullptr;
-            int fd = open(fileName, 0);
-            struct stat file_stat;
-            if (fd != -1)
-            {
-                fstat(fd, &file_stat);
-                file_mmap = (char *) mmap((void *)0, file_stat.st_size, PROT_READ, MAP_SHARED, fd, 0);
-                if (file_mmap == MAP_FAILED)
-                {
-                    DBGLOG("Failed to map file %s to pre-warm cache (error %d)", fileName.str(), errno);
-                    file_mmap = nullptr;
-                }
-            }
-            else if (traceLevel)
-            {
-                DBGLOG("Failed to open file %s to pre-warm cache (error %d)", fileName.str(), errno);
-            }
-#endif
-            // "fileName" is the filename that roxie would use if it copied the file locally.  This may not
-            // match the name of the actual file - e.g. if the file is local but in a different location.
-            Owned<ILazyFileIO> localFile = lookupLocalFile(fileName);
-            if (localFile)
-            {
-                fileIdx = localFile->getFileIdx();
-            }
-            cacheInfo = endName+1;  // Skip the |
-            while (*cacheInfo==' ')
-                cacheInfo++;
-            for (;;)
-            {
-                bool inNodeCache = (*cacheInfo=='*');
-                NodeType nodeType = NodeNone;
-                if (inNodeCache)
-                {
-                    cacheInfo++;
-                    switch (*cacheInfo)
-                    {
-                    case 'R': nodeType = NodeBranch; break;
-                    case 'L': nodeType = NodeLeaf; break;
-                    case 'B': nodeType = NodeBlob; break;
-                    default:
-                        throwUnexpectedX("Unknown node type");
-                    }
-                    cacheInfo++;
-                }
-                __uint64 startPage = readPage(cacheInfo);
-                __uint64 endPage;
-                if (*cacheInfo=='-')
-                {
-                    cacheInfo++;
-                    endPage = readPage(cacheInfo);
-                }
-                else
-                    endPage = startPage;
-                if (traceLevel > 8)
-                    DBGLOG("Touching %s %" I64F "x-%" I64F "x", fileName.str(), startPage, endPage);
-                offset_t startOffset = startPage << CacheInfoEntry::pageBits;
-                offset_t endOffset = (endPage+1) << CacheInfoEntry::pageBits;
-                if (inNodeCache && !keyFailed && localFile && !keyIndex)
-                {
-                    //Pass false for isTLK - it will be initialised from the index header
-                    keyIndex.setown(createKeyIndex(fileName, localFile->getCrc(), *localFile.get(), fileIdx, false));
-                    if (!keyIndex)
-                        keyFailed = true;
-                }
-                if (inNodeCache && keyIndex)
-                {
-                    // Round startOffset up to nearest multiple of index node size
-                    unsigned nodeSize = keyIndex->getNodeSize();
-                    startOffset = ((startOffset+nodeSize-1)/nodeSize)*nodeSize;
-                    do
-                    {
-                        bool loaded = keyIndex->prewarmPage(startOffset, nodeType);
-                        if (!loaded)
-                            break;
-                        preloaded++;
-                        startOffset += nodeSize;
-                    }
-                    while (startOffset < endOffset);
-                }
-#ifndef _WIN32
-                else if (file_mmap)
-                {
-                    if (fileIdx != (unsigned) -1)
-                        noteRead(fileIdx, startOffset, (endOffset-1) - startOffset);  // Ensure pages we prewarm are recorded in our cache tracker
-                    do
-                    {
-                        if (startOffset >= (offset_t) file_stat.st_size)
-                            break;    // Let's not core if the file has changed size since we recorded the info...
-                        t += file_mmap[startOffset];  // NOTE - t reported below so it cannot be optimized out
-                        touched++;
-                        startOffset += os_page_size;
-                    }
-                    while (startOffset < endOffset);
-                }
-#endif
-                if (*cacheInfo != ' ')
-                    break;
-                cacheInfo++;
-            }
-#ifndef _WIN32
-            if (file_mmap)
-                munmap(file_mmap, file_stat.st_size);
-            if (fd != -1)
-                close(fd);
-#endif
-            if (*cacheInfo != '\n')
-                break;
-            cacheInfo++;
-        }
-        assert(!*cacheInfo);
-        if (*cacheInfo)
-        {
-            DBGLOG("WARNING: Unrecognized cacheInfo format at %.20s", cacheInfo);
-        }
-        if (traceLevel)
-            DBGLOG("Touched %d pages, preloaded %d index nodes, result %d", touched, preloaded, t);  // We report t to make sure that compiler doesn't decide to optimize it away entirely
+        IndexCacheWarmer warmer(this);
+        if (!::warmOsCache(cacheInfo, &warmer))
+            DBGLOG("WARNING: Unrecognized cacheInfo format");
+        warmer.report();
     }
 
     virtual void clearOsCache() override
@@ -2305,7 +2192,7 @@ public:
     {
         if (part > numParts || part == 0)
         {
-            throw MakeStringException(ROXIE_FILE_ERROR, "Internal error - requesting base for non-existant file part %d (valid are 1-%d)", part, numParts);
+            throw MakeStringException(ROXIE_FILE_ERROR, "Internal error - requesting base for non-existent file part %d (valid are 1-%d)", part, numParts);
         }
         return map[part-1].base;
     }

+ 1 - 0
roxie/ccd/ccdfile.hpp

@@ -75,6 +75,7 @@ interface IRoxieFileCache : extends IInterface
     virtual void loadSavedOsCacheInfo() = 0;
     virtual void noteRead(unsigned fileIdx, offset_t pos, unsigned len) = 0;
     virtual void startCacheReporter() = 0;
+    virtual ILazyFileIO *lookupLocalFile(const char *filename) = 0;
 };
 
 interface IDiffFileInfoCache : extends IInterface

+ 73 - 0
roxie/ccdcache/CMakeLists.txt

@@ -0,0 +1,73 @@
+################################################################################
+#    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+################################################################################
+
+
+project( ccdcache ) 
+
+set (   SRCS 
+        ccdcache.cpp
+    )
+
+include_directories ( 
+         .
+         ${HPCC_SOURCE_DIR}/roxie/ccd
+         ${HPCC_SOURCE_DIR}/fs/dafsclient
+         ${HPCC_SOURCE_DIR}/system/jhtree
+         ${HPCC_SOURCE_DIR}/system/mp
+         ${HPCC_SOURCE_DIR}/common/workunit
+         ${HPCC_SOURCE_DIR}/roxie/udplib
+         ${HPCC_SOURCE_DIR}/roxie/roxie
+         ${HPCC_SOURCE_DIR}/common/environment
+         ${HPCC_SOURCE_DIR}/ecl/hthor
+         ${HPCC_SOURCE_DIR}/ecl/schedulectrl
+         ${HPCC_SOURCE_DIR}/rtl/nbcd
+         ${HPCC_SOURCE_DIR}/common/deftype
+         ${HPCC_SOURCE_DIR}/system/include
+         ${HPCC_SOURCE_DIR}/dali/base
+         ${HPCC_SOURCE_DIR}/dali/dfu
+         ${HPCC_SOURCE_DIR}/roxie/roxiemem
+         ${HPCC_SOURCE_DIR}/common/dllserver
+         ${HPCC_SOURCE_DIR}/system/jlib
+         ${HPCC_SOURCE_DIR}/common/thorhelper
+         ${HPCC_SOURCE_DIR}/rtl/eclrtl
+         ${HPCC_SOURCE_DIR}/rtl/include
+         ${HPCC_SOURCE_DIR}/testing/unittests
+         ${HPCC_SOURCE_DIR}/dali/ft
+         ${HPCC_SOURCE_DIR}/system/security/shared
+         ${HPCC_SOURCE_DIR}/system/security/securesocket
+         ${HPCC_SOURCE_DIR}/system/libbase58
+    )
+
+if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG)
+  SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-switch -Wno-unused-parameter -Werror -Wno-delete-non-virtual-dtor -Wno-overloaded-virtual")
+  if (CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.0.0")
+    SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess")
+  endif()
+endif()
+
+ADD_DEFINITIONS( -D_USRDLL -D_STANDALONE_CCDCACHE )
+
+if (CMAKE_COMPILER_IS_CLANGXX)
+  SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-switch-enum -Wno-format-security -Werror=reorder")
+endif()
+
+HPCC_ADD_EXECUTABLE ( ccdcache ${SRCS} )
+install ( TARGETS ccdcache RUNTIME DESTINATION ${EXEC_DIR} )
+
+target_link_libraries ( ccdcache
+         jlib
+    )
+

+ 323 - 0
roxie/ccdcache/ccdcache.cpp

@@ -0,0 +1,323 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#include "jlib.hpp"
+#include "jfile.hpp"
+#include "jhtree.hpp"
+#include "ctfile.hpp"
+#include "ccdfile.hpp"
+#include "ccdcache.hpp"
+
+#ifdef _STANDALONE_CCDCACHE
+#if defined(__linux__) || defined(__APPLE__)
+#include <sys/mman.h>
+#endif
+#include <setjmp.h>
+#include <signal.h>
+#endif
+
+static unsigned __int64 readPage(const char * &_t)
+{
+    const char *t = _t;
+    unsigned __int64 v = 0;
+    for (;;)
+    {
+        char c = *t;
+        if ((c >= '0') && (c <= '9'))
+            v = v * 16 + (c-'0');
+        else if ((c >= 'a') && (c <= 'f'))
+            v = v * 16 + (c-'a'+10);
+        else if ((c >= 'A') && (c <= 'F'))
+            v = v * 16 + (c-'A'+10);
+        else
+            break;
+        t++;
+    }
+    _t = t;
+    return v;
+}
+
+// Note that warmOsCache is called twice for each cacheInfo file - once via separate process to touch pages into linux page cache,
+// and once within the Roxie process to preload the index cache and initialize the cache info structure for future cache reports.
+
+bool warmOsCache(const char *cacheInfo, ICacheWarmer *callback)
+{
+    if (!cacheInfo)
+        return true;
+    while (*cacheInfo)
+    {
+        // We are parsing lines that look like:
+        // <channel>|<filename>|<pagelist>
+        //
+        // Where pagelist is a space-separated list of page numbers or (inclusive) ranges.
+        // A page number or range prefixed by a * means that the page(s) was found in the jhtree cache.
+        //
+        // For example,
+        // 1|/var/lib/HPCCSystems/hpcc-data/unknown/regress/multi/dg_index_evens._1_of_3|*0 3-4
+        // Pages are always recorded and specified as 8192 bytes (unless pagebits ever changes).
+
+        strtoul(cacheInfo, (char **) &cacheInfo, 10);  // Skip fileChannel - we don't care
+        if (*cacheInfo != '|')
+            break;
+        cacheInfo++;
+        const char *endName = strchr(cacheInfo, '|');
+        assert(endName);
+        if (!endName)
+            break;
+        StringBuffer fileName(endName-cacheInfo, cacheInfo);
+        callback->startFile(fileName.str());
+        cacheInfo = endName+1;  // Skip the |
+        while (*cacheInfo==' ')
+            cacheInfo++;
+        for (;;)
+        {
+            bool inNodeCache = (*cacheInfo=='*');
+            NodeType nodeType = NodeNone;
+            if (inNodeCache)
+            {
+                cacheInfo++;
+                switch (*cacheInfo)
+                {
+                case 'R': nodeType = NodeBranch; break;
+                case 'L': nodeType = NodeLeaf; break;
+                case 'B': nodeType = NodeBlob; break;
+                default:
+                    throwUnexpectedX("Unknown node type");
+                }
+                cacheInfo++;
+            }
+            __uint64 startPage = readPage(cacheInfo);
+            __uint64 endPage;
+            if (*cacheInfo=='-')
+            {
+                cacheInfo++;
+                endPage = readPage(cacheInfo);
+            }
+            else
+                endPage = startPage;
+            offset_t startOffset = startPage << CacheInfoEntry::pageBits;
+            offset_t endOffset = (endPage+1) << CacheInfoEntry::pageBits;
+            if (!callback->warmBlock(fileName.str(), nodeType, startOffset, endOffset))
+            {
+                while (*cacheInfo && *cacheInfo != '\n')
+                    cacheInfo++;
+                break;
+            }
+            if (*cacheInfo != ' ')
+                break;
+            cacheInfo++;
+        }
+        callback->endFile();
+        if (*cacheInfo != '\n')
+            break;
+        cacheInfo++;
+    }
+    assert(!*cacheInfo);
+    return(*cacheInfo == '\0');
+}
+
+#ifdef _STANDALONE_CCDCACHE
+// See example code at https://github.com/sublimehq/mmap-example/blob/master/read_mmap.cc
+
+thread_local volatile bool sigbus_jmp_set;
+thread_local sigjmp_buf sigbus_jmp_buf;
+
+static void handle_sigbus(int c)
+{
+    // Only handle the signal if the jump point is set on this thread
+    if (sigbus_jmp_set)
+    {
+        sigbus_jmp_set = false;
+
+        // siglongjmp out of the signal handler, returning the signal
+        siglongjmp(sigbus_jmp_buf, c);
+    }
+}
+
+static void install_signal_handlers()
+{
+    // Install signal handler for SIGBUS
+    struct sigaction act;
+    act.sa_handler = &handle_sigbus;
+
+    // SA_NODEFER is required due to siglongjmp
+    act.sa_flags = SA_NODEFER;
+    sigemptyset(&act.sa_mask); // Don't block any signals
+
+    // Connect the signal
+    sigaction(SIGBUS, &act, nullptr);
+}
+
+static bool testErrors = false;
+static bool includeInCacheIndexes = false;
+static size_t os_page_size = getpagesize();
+
+class StandaloneCacheWarmer : implements ICacheWarmer
+{
+    unsigned traceLevel;
+    unsigned filesTouched = 0;
+    unsigned pagesTouched = 0;
+    char *file_mmap = nullptr;
+    int fd = -1;
+    struct stat file_stat;
+    char dummy = 0;
+
+    void warmRange(offset_t startOffset, offset_t endOffset)
+    {
+        do
+        {
+            if (startOffset >= (offset_t) file_stat.st_size)
+                break;    // Let's not core if the file has changed size since we recorded the info...
+            dummy += file_mmap[startOffset];
+            if (testErrors)
+                raise(SIGBUS);
+            pagesTouched++;
+            startOffset += os_page_size;
+        }
+        while (startOffset < endOffset);
+    }
+public:
+    StandaloneCacheWarmer(unsigned _traceLevel) : traceLevel(_traceLevel) {}
+
+    virtual void startFile(const char *filename) override
+    {
+        file_mmap = nullptr;
+        fd = open(filename, 0);
+        if (fd != -1)
+        {
+            fstat(fd, &file_stat);
+            file_mmap = (char *) mmap((void *)0, file_stat.st_size, PROT_READ, MAP_SHARED, fd, 0);
+            if (file_mmap == MAP_FAILED)
+            {
+                printf("Failed to map file %s to pre-warm cache (error %d)\n", filename, errno);
+                file_mmap = nullptr;
+            }
+            else
+                filesTouched++;
+        }
+        else if (traceLevel)
+        {
+            printf("Failed to open file %s to pre-warm cache (error %d)\n", filename, errno);
+        }
+    }
+
+    virtual bool warmBlock(const char *filename, NodeType nodeType, offset_t startOffset, offset_t endOffset) override
+    {
+        if (!includeInCacheIndexes && nodeType != NodeNone)
+            return true;
+        if (traceLevel > 8)
+            printf("Touching %s %" I64F "x-%" I64F "x\n", filename, startOffset, endOffset);
+        if (file_mmap)
+        {
+            sigbus_jmp_set = true;
+            if (sigsetjmp(sigbus_jmp_buf, 0) == 0)
+            {
+                warmRange(startOffset, endOffset);
+            }
+            else
+            {
+                if (traceLevel)
+                    printf("SIGBUF caught while trying to touch file %s at offset %" I64F "x\n", filename, startOffset);
+                sigbus_jmp_set = false;
+                return false;
+            }
+            sigbus_jmp_set = false;
+            return true;
+        }
+        else
+            return false;
+    }
+
+    virtual void endFile() override
+    {
+        if (file_mmap)
+            munmap(file_mmap, file_stat.st_size);
+        if (fd != -1)
+            close(fd);
+        fd = -1;
+        file_mmap = nullptr;
+    }
+
+    virtual void report() override
+    {
+        if (traceLevel)
+            printf("Touched %u pages from %u files (dummyval %u)\n", pagesTouched, filesTouched, dummy);  // We report dummy to make sure that compiler doesn't decide to optimize it away entirely
+    }
+};
+
+static void usage()
+{
+    printf("Usage: ccdcache <options> filename\n");
+    printf("Options:\n");
+    printf("  -t  traceLevel\n");
+    printf("  -i  Include in-cache index files too\n");
+    exit(2);
+}
+
+int main(int argc, const char **argv)
+{
+    if (argc < 2)
+        usage();
+    int arg = 1;
+    const char *cacheFileName = nullptr;
+    unsigned traceLevel = 1;
+    while (arg < argc)
+    {
+        if (streq(argv[arg], "-t") || streq(argv[arg], "--traceLevel"))
+        {
+            arg++;
+            if (arg == argc)
+                usage();
+            traceLevel = atoi(argv[arg]);
+        }
+        else if (streq(argv[arg], "-e") || streq(argv[arg], "--testErrors"))
+        {
+            testErrors = true;
+        }
+        else if (streq(argv[arg], "-i") || streq(argv[arg], "--includecachedindexes"))
+        {
+            includeInCacheIndexes = true;
+        }
+        else if (*(argv[arg]) == '-' || cacheFileName != nullptr)
+            usage();
+        else
+            cacheFileName = argv[arg];
+        arg++;
+    }
+    StringBuffer cacheInfo;
+    install_signal_handlers();
+    StandaloneCacheWarmer warmer(traceLevel);
+    try
+    {
+        if (checkFileExists(cacheFileName))
+        {
+             if (traceLevel)
+                printf("Loading cache information from %s\n", cacheFileName);
+            cacheInfo.loadFile(cacheFileName, false);
+            if (!warmOsCache(cacheInfo, &warmer))
+                printf("WARNING: Unrecognized cacheInfo format in file %s\n", cacheFileName);
+            warmer.report();
+        }
+    }
+    catch(IException *E)
+    {
+        EXCLOG(E);
+        E->Release();
+    }
+}
+#endif
+

+ 83 - 0
roxie/ccdcache/ccdcache.hpp

@@ -0,0 +1,83 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#ifndef _CCDCACHE_INCL
+#define _CCDCACHE_INCL
+
+struct CacheInfoEntry
+{
+    //For convenience the values for PageType match the NodeX enumeration (see noteWarm).
+    //Ensure disk entries sort last so that index nodes take precedence when deduping offsets.
+    enum PageType : unsigned
+    {
+        PageTypeBranch = 0,
+        PageTypeLeaf = 1,
+        PageTypeBlob = 2,
+        PageTypeDisk = 3,
+    };
+
+    union
+    {
+        struct
+        {
+#ifndef _WIN32
+            unsigned type: 2;    // disk or the kind of index node
+            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
+            unsigned file: 24;   // Up to 4 million files
+#else
+//Windows does not like packing bitfields with different base types - fails the static assert
+            __uint64 type: 2;    // disk or the kind of index node
+            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
+            __uint64 file: 24;   // Up to 16 million files
+#endif
+        } b;
+        __uint64 u;
+    };
+
+#ifndef _WIN32
+    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
+#elif _MSC_VER >= 1900
+    //Older versions of the windows compiler complain CacheInfoEntry::b is not a type name
+    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
+#endif
+
+    inline CacheInfoEntry() { u = 0; }
+    inline CacheInfoEntry(unsigned _file, offset_t _pos, PageType pageType)
+    {
+        b.file = _file;
+        b.page = _pos >> pageBits;
+        b.type = pageType;
+    }
+    inline bool operator < ( const CacheInfoEntry &l) const { return u < l.u; }
+    inline bool operator <= ( const CacheInfoEntry &l) const { return u <= l.u; }
+    inline void operator++ () { b.page++; }
+
+    static constexpr unsigned pageBits = 13;  // 8k 'pages'
+};
+
+interface ICacheWarmer
+{
+    virtual void startFile(const char *filename) = 0;
+    virtual bool warmBlock(const char *fileName, NodeType nodeType, offset_t startOffset, offset_t endOffset) = 0;
+    virtual void endFile() = 0;
+    virtual void report() = 0;
+};
+
+extern bool warmOsCache(const char *cacheInfo, ICacheWarmer *warmer);
+
+
+#endif //_CCDCACHE_INCL