Browse Source

HPCC-25606 Improve cache behaviour for concurrent page requests

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 4 years ago
parent
commit
bc6582a6b6

+ 8 - 1
roxie/ccd/ccdfile.cpp

@@ -618,13 +618,20 @@ struct CacheInfoEntry
     {
         struct
         {
+#ifndef _WIN32
             unsigned type: 2;    // disk or the kind of index node
             __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
             unsigned file: 24;   // Up to 4 million files
+#else
+//Windows does not like packing bitfields with different base types - fails the statck assert
+            __uint64 type: 2;    // disk or the kind of index node
+            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
+            __uint64 file: 24;   // Up to 4 million files
+#endif
         } b;
         __uint64 u;
     };
-    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue");
+    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
 
     inline CacheInfoEntry() { u = 0; }
     inline CacheInfoEntry(unsigned _file, offset_t _pos, PageType pageType)

+ 1 - 0
roxie/ccd/ccdmain.cpp

@@ -1114,6 +1114,7 @@ int CCD_API roxie_main(int argc, const char *argv[], const char * defaultYaml)
         setLeafCacheMem(leafCacheMB * 0x100000);
         blobCacheMB = topology->getPropInt("@blobCacheMem", 0);
         setBlobCacheMem(blobCacheMB * 0x100000);
+        setLegacyNodeCache(topology->getPropBool("@legacyNodeCache", false));
 
         unsigned __int64 affinity = topology->getPropInt64("@affinity", 0);
         updateAffinity(affinity);

+ 7 - 2
roxie/ccd/ccdsnmp.cpp

@@ -384,12 +384,17 @@ CRoxieMetricsManager::CRoxieMetricsManager()
     addMetric(nodesLoaded, 1000);
     addMetric(cacheHits, 1000);
     addMetric(cacheAdds, 1000);
+
+    addMetric(blobCacheHits, 1000);
+    addMetric(blobCacheAdds, 1000);
+    addMetric(blobCacheDups, 1000);
     addMetric(leafCacheHits, 1000);
     addMetric(leafCacheAdds, 1000);
+    addMetric(leafCacheDups, 1000);
     addMetric(nodeCacheHits, 1000);
     addMetric(nodeCacheAdds, 1000);
-    addMetric(preloadCacheHits, 0);
-    addMetric(preloadCacheAdds, 0);
+    addMetric(nodeCacheDups, 1000);
+
     addMetric(unwantedDiscarded, 1000);
 
     addMetric(getHeapAllocated, 0);

+ 6 - 0
roxie/ccd/ccdstate.cpp

@@ -2372,6 +2372,12 @@ private:
                 topology->setPropInt("@leafCacheMem", leafCacheMB);
                 setLeafCacheMem(leafCacheMB * 0x100000);
             }
+            else if (stricmp(queryName, "control:legacyNodeCache")==0)
+            {
+                bool legacyNodeCache = control->getPropBool("@val", true);
+                topology->setPropInt("@legacyNodeCache", legacyNodeCache);
+                setLegacyNodeCache(legacyNodeCache);
+            }
             else if (stricmp(queryName, "control:listFileOpenErrors")==0)
             {
                 // this just creates a delta state file to remove references to Keys / Files we now longer have interest in

+ 4 - 1
system/jhtree/ctfile.hpp

@@ -193,11 +193,12 @@ class jhtree_decl CNodeBase : public CWritableKeyNode
 {
 protected:
     NodeHdr hdr;
-    byte keyType;
     size32_t keyLen;
     size32_t keyCompareLen;
     CKeyHdr *keyHdr;
+    byte keyType;
     bool isVariable;
+    std::atomic<bool> ready{false}; // is this node read for use?  Can be checked outside a critsec, but only set within one.
 
 private:
     offset_t fpos;
@@ -213,6 +214,8 @@ public:
     inline bool isLeaf() const { return hdr.leafFlag != NodeBranch; }       // actually is-non-branch.  Use should be reviewed.
     inline NodeType getNodeType() const { return (NodeType)hdr.leafFlag; }
 
+    inline bool isReady() const { return ready; }
+    inline void noteReady() { ready = true; }
 public:
     CNodeBase();
     void load(CKeyHdr *keyHdr, offset_t fpos);

+ 210 - 166
system/jhtree/jhtree.cpp

@@ -598,13 +598,9 @@ typedef OwningSimpleHashTableOf<CNodeMapping, CKeyIdAndPos> CNodeTable;
 #define FIXED_NODE_OVERHEAD (sizeof(CJHTreeNode))
 class CNodeMRUCache : public CMRUCacheOf<CKeyIdAndPos, CJHTreeNode, CNodeMapping, CNodeTable>
 {
-    size32_t sizeInMem, memLimit;
+    std::atomic<size32_t> sizeInMem{0};
+    size32_t memLimit = 0;
 public:
-    CNodeMRUCache(size32_t _memLimit) : memLimit(0)
-    {
-        sizeInMem = 0;
-        setMemLimit(_memLimit);
-    }
     size32_t setMemLimit(size32_t _memLimit)
     {
         size32_t oldMemLimit = memLimit;
@@ -618,6 +614,12 @@ public:
         // remove LRU until !full
         do
         {
+            //Never evict an entry that hasn't yet loaded - otherwise the sizeInMem can become inconsistent
+            CNodeMapping *tail = mruList.tail();
+            assertex(tail);
+            if (!tail->queryElement().isReady() )
+                break;
+
             clear(1);
         }
         while (full());
@@ -648,57 +650,76 @@ public:
             cacheInfo.noteWarm(key.keyId, key.pos, node.getNodeSize(), node.getNodeType());
         }
     }
+    void noteReady(CJHTreeNode &node)
+    {
+        //On the previous call node.getMemSize() will have returned 0 if it has not been loaded
+        sizeInMem += node.getMemSize();
+    }
 };
 
+enum CacheType : unsigned
+{
+    CacheBranch = 0,
+    CacheLeaf = 1,
+    CacheBlob = 2,
+    //CacheTLK?
+    CacheMax = 3
+};
+static_assert((unsigned)CacheBranch == (unsigned)NodeBranch, "Mismatch Cache Branch");
+static_assert((unsigned)CacheLeaf == (unsigned)NodeLeaf, "Mismatch Cache Leaf");
+static_assert((unsigned)CacheBlob == (unsigned)NodeBlob, "Mismatch Cache Blob");
+
 class CNodeCache : public CInterface
 {
 private:
-    mutable CriticalSection lock;
-    CNodeMRUCache nodeCache;
-    CNodeMRUCache leafCache;
-    CNodeMRUCache blobCache;
-    bool cacheNodes; 
-    bool cacheLeaves;
-    bool cacheBlobs;
+    mutable CriticalSection lock[CacheMax];
+    CNodeMRUCache cache[CacheMax];
+    bool cacheEnabled[CacheMax] = { false, false, false };
+    bool legacyMode = false;
 public:
     CNodeCache(size32_t maxNodeMem, size32_t maxLeaveMem, size32_t maxBlobMem)
-        : nodeCache(maxNodeMem), leafCache(maxLeaveMem), blobCache(maxBlobMem)
     {
-        cacheNodes = maxNodeMem != 0;
-        cacheLeaves = maxLeaveMem != 0;;
-        cacheBlobs = maxBlobMem != 0;
+        setNodeCacheMem(maxNodeMem);
+        setLeafCacheMem(maxLeaveMem);
+        setBlobCacheMem(maxBlobMem);
         // note that each index caches the last blob it unpacked so that sequential blobfetches are still ok
     }
     CJHTreeNode *getNode(INodeLoader *key, unsigned keyID, offset_t pos, NodeType type, IContextLogger *ctx, bool isTLK);
     void getCacheInfo(ICacheInfoRecorder &cacheInfo);
 
+
     inline size32_t setNodeCacheMem(size32_t newSize)
     {
-        CriticalBlock block(lock);
-        unsigned oldV = nodeCache.setMemLimit(newSize);
-        cacheNodes = (newSize != 0);
-        return oldV;
+        return setCacheMem(newSize, CacheBranch);
     }
     inline size32_t setLeafCacheMem(size32_t newSize)
     {
-        CriticalBlock block(lock);
-        unsigned oldV = leafCache.setMemLimit(newSize);
-        cacheLeaves = (newSize != 0); 
-        return oldV;
+        return setCacheMem(newSize, CacheLeaf);
     }
     inline size32_t setBlobCacheMem(size32_t newSize)
     {
-        CriticalBlock block(lock);
-        unsigned oldV = blobCache.setMemLimit(newSize);
-        cacheBlobs = (newSize != 0); 
-        return oldV;
+        return setCacheMem(newSize, CacheBlob);
+    }
+    inline void setLegacyLocking(bool _value)
+    {
+        legacyMode = _value;
     }
     void clear()
     {
-        CriticalBlock block(lock);
-        nodeCache.kill();
-        leafCache.kill();
-        blobCache.kill();
+        for (unsigned i=0; i < CacheMax; i++)
+        {
+            CriticalBlock block(lock[i]);
+            cache[i].kill();
+        }
+    }
+
+protected:
+    size32_t setCacheMem(size32_t newSize, CacheType type)
+    {
+        CriticalBlock block(lock[type]);
+        unsigned oldV = cache[type].setMemLimit(newSize);
+        cacheEnabled[type] = (newSize != 0);
+        return oldV;
     }
 };
 
@@ -1014,7 +1035,7 @@ CMemKeyIndex::CMemKeyIndex(unsigned _iD, IMemoryMappedFile *_io, const char *_na
     init(hdr, isTLK);
 }
 
-CJHTreeNode *CMemKeyIndex::loadNode(offset_t pos)
+CJHTreeNode *CMemKeyIndex::loadNode(CJHTreeNode * optNode, offset_t pos)
 {
     nodesLoaded++;
     if (pos + keyHdr->getNodeSize() > io->fileSize())
@@ -1027,6 +1048,8 @@ CJHTreeNode *CMemKeyIndex::loadNode(offset_t pos)
     }
     char *nodeData = (char *) (io->base() + pos);
     MTIME_SECTION(queryActiveTimer(), "JHTREE read node");
+    if (optNode)
+        return CKeyIndex::loadNode(optNode, nodeData, pos, false);
     return CKeyIndex::loadNode(nodeData, pos, false);
 }
 
@@ -1046,7 +1069,7 @@ CDiskKeyIndex::CDiskKeyIndex(unsigned _iD, IFileIO *_io, const char *_name, bool
     init(hdr, isTLK);
 }
 
-CJHTreeNode *CDiskKeyIndex::loadNode(offset_t pos) 
+CJHTreeNode *CDiskKeyIndex::loadNode(CJHTreeNode * optNode, offset_t pos)
 {
     nodesLoaded++;
     unsigned nodeSize = keyHdr->getNodeSize();
@@ -1061,45 +1084,52 @@ CJHTreeNode *CDiskKeyIndex::loadNode(offset_t pos)
         EXCLOG(E, m.str());
         throw E;
     }
+    if (optNode)
+        return CKeyIndex::loadNode(optNode, nodeData, pos, true);
     return CKeyIndex::loadNode(nodeData, pos, true);
 }
 
-CJHTreeNode *CKeyIndex::loadNode(char *nodeData, offset_t pos, bool needsCopy) 
+CJHTreeNode *CKeyIndex::createNode(NodeType type)
+{
+    switch(type)
+    {
+    case NodeBranch:
+        return new CJHTreeNode();
+    case NodeLeaf:
+        if (keyHdr->isVariable())
+            return new CJHVarTreeNode();
+        else if (keyHdr->isRowCompressed())
+            return new CJHRowCompressedNode();
+        else
+            return new CJHTreeNode();
+    case NodeBlob:
+        return new CJHTreeBlobNode();
+    case NodeMeta:
+        return new CJHTreeMetadataNode();
+    case NodeBloom:
+        return new CJHTreeBloomTableNode();
+    default:
+        throwUnexpected();
+    }
+}
+
+CJHTreeNode *CKeyIndex::loadNode(char *nodeData, offset_t pos, bool needsCopy)
+{
+    char leafFlag = ((NodeHdr *) nodeData)->leafFlag;
+    Owned<CJHTreeNode> ret = createNode((NodeType)leafFlag);
+    loadNode(ret, nodeData, pos, needsCopy);
+    return ret.getClear();
+}
+
+CJHTreeNode * CKeyIndex::loadNode(CJHTreeNode * ret, char *nodeData, offset_t pos, bool needsCopy)
 {
     try
     {
-        Owned<CJHTreeNode> ret;
-        char leafFlag = ((NodeHdr *) nodeData)->leafFlag;
-        switch(leafFlag)
-        {
-        case NodeBranch:
-            ret.setown(new CJHTreeNode());
-            break;
-        case NodeLeaf:
-            if (keyHdr->isVariable())
-                ret.setown(new CJHVarTreeNode());
-            else if (keyHdr->isRowCompressed())
-                ret.setown(new CJHRowCompressedNode());
-            else
-                ret.setown(new CJHTreeNode());
-            break;
-        case NodeBlob:
-            ret.setown(new CJHTreeBlobNode());
-            break;
-        case NodeMeta:
-            ret.setown(new CJHTreeMetadataNode());
-            break;
-        case NodeBloom:
-            ret.setown(new CJHTreeBloomTableNode());
-            break;
-        default:
-            throwUnexpected();
-        }
         {
             MTIME_SECTION(queryActiveTimer(), "JHTREE load node");
             ret->load(keyHdr, nodeData, pos, needsCopy);
+            return ret;
         }
-        return ret.getClear();
     }
     catch (IException *E)
     {
@@ -2321,6 +2351,11 @@ extern jhtree_decl size32_t setBlobCacheMem(size32_t cacheSize)
     return queryNodeCache()->setBlobCacheMem(cacheSize);
 }
 
+extern jhtree_decl void setLegacyNodeCache(bool _value)
+{
+    return queryNodeCache()->setLegacyLocking(_value);
+}
+
 extern jhtree_decl void getNodeCacheInfo(ICacheInfoRecorder &cacheInfo)
 {
     // MORE - consider reporting root nodes of open IKeyIndexes too?
@@ -2333,12 +2368,24 @@ extern jhtree_decl void getNodeCacheInfo(ICacheInfoRecorder &cacheInfo)
 
 void CNodeCache::getCacheInfo(ICacheInfoRecorder &cacheInfo)
 {
-    CriticalBlock block(lock);
-    blobCache.reportEntries(cacheInfo);
-    nodeCache.reportEntries(cacheInfo);
-    leafCache.reportEntries(cacheInfo);
+    for (unsigned i = 0; i < CacheMax; i++)
+    {
+        CriticalBlock block(lock[i]);
+        cache[i].reportEntries(cacheInfo);
+    }
 }
 
+constexpr StatisticKind addStatId[CacheMax] = { StNumNodeCacheAdds, StNumLeafCacheAdds, StNumBlobCacheAdds };
+constexpr StatisticKind hitStatId[CacheMax] = { StNumNodeCacheHits, StNumLeafCacheHits, StNumBlobCacheHits };
+constexpr RelaxedAtomic<unsigned> * hitMetric[CacheMax] = { &nodeCacheHits, &leafCacheHits, &blobCacheHits };
+constexpr RelaxedAtomic<unsigned> * addMetric[CacheMax] = { &nodeCacheAdds, &leafCacheAdds, &blobCacheAdds };
+constexpr RelaxedAtomic<unsigned> * dupMetric[CacheMax] = { &nodeCacheDups, &leafCacheDups, &blobCacheDups };
+
+//Rather than using a critical section in each node (which can be large and expensive) have an array which is indexed by a function
+//of the key id/file position
+constexpr unsigned numLoadCritSects = 64;
+static CriticalSection loadCs[numLoadCritSects];
+
 CJHTreeNode *CNodeCache::getNode(INodeLoader *keyIndex, unsigned iD, offset_t pos, NodeType type, IContextLogger *ctx, bool isTLK)
 {
     // MORE - could probably be improved - I think having the cache template separate is not helping us here
@@ -2346,122 +2393,117 @@ CJHTreeNode *CNodeCache::getNode(INodeLoader *keyIndex, unsigned iD, offset_t po
     if (!pos)
         return NULL;
 
-    CKeyIdAndPos key(iD, pos);
+    // No benefit in caching the following, especially since they will evict useful pages
+    if ((type == NodeMeta) || (type == NodeBloom))
+        return keyIndex->loadNode(pos);
+
     //NOTE: TLK leaf nodes are currently cached along with branches, not with leaves.  It might be better if this was a separate cache.
-    NodeType cacheType = isTLK ? NodeBranch : type;
+    CacheType cacheType = isTLK ? CacheBranch : (CacheType)type;
 
-    // No benefit in caching the following, especially since they will evict useful pages
-    // could also check cache[cacheType] if an array was used and avoid the tests in the critical section (change later)
-    if ((cacheType == NodeMeta) || (cacheType == NodeBloom))
-    {
+    // check cacheEnabled[cacheType] avoid the critical section (and testing the flag within the critical section)
+    if (unlikely(!cacheEnabled[cacheType]))
         return keyIndex->loadNode(pos);
-    }
 
-    CriticalBlock block(lock);
-    //TBD: Use arrays indexed by the caching type to simplify this code.
-    switch (cacheType)
+    //Legacy cache access:
+    //  Lock, unlock.  Load the page.  Lock, check if it has been added, otherwise add.
+    //New code:
+    //  Lock, add if missing, unlock.  Lock a page-dependent-cr load() release lock.
+    //There will be the same number of critical section locks, but loading a page will contend on a different lock - so it should reduce contention.
+    //There will be a limit on the number of nodes concurrently being loaded from memory with the new code, where it was unlimited before, but
+    //nodes will only be loaded once.
+    CKeyIdAndPos key(iD, pos);
+    CriticalSection & cacheLock = lock[cacheType];
+    if (legacyMode)
     {
-    case NodeBranch:
-        if (cacheNodes)
+        CriticalBlock block(cacheLock);
+        CJHTreeNode *cacheNode = cache[cacheType].query(key);
+        if (likely(cacheNode))
         {
-            CJHTreeNode *cacheNode = nodeCache.query(key);
-            if (cacheNode)
-            {
-                cacheHits++;
-                if (ctx) ctx->noteStatistic(StNumNodeCacheHits, 1);
-                nodeCacheHits++;
-                return LINK(cacheNode);
-            }
+            cacheHits++;
+            if (ctx) ctx->noteStatistic(hitStatId[cacheType], 1);
+            (*hitMetric[cacheType])++;
+            return LINK(cacheNode);
         }
-        break;
-    case NodeLeaf:
-        if (cacheLeaves)
+
+        CJHTreeNode *node;
         {
-            CJHTreeNode *cacheNode = leafCache.query(key);
-            if (cacheNode)
-            {
-                cacheHits++;
-                if (ctx) ctx->noteStatistic(StNumLeafCacheHits, 1);
-                leafCacheHits++;
-                return LINK(cacheNode);
-            }
+            CriticalUnblock block(cacheLock);
+            node = keyIndex->loadNode(pos);  // NOTE - don't want cache locked while we load!
+            node->noteReady();
         }
-        break;
-    case NodeBlob:
-        if (cacheBlobs)
+
+        cacheAdds++;
+        cacheNode = cache[cacheType].query(key); // check if added to cache while we were reading
+        if (cacheNode)
         {
-            CJHTreeNode *cacheNode = blobCache.query(key);
-            if (cacheNode)
-            {
-                cacheHits++;
-                if (ctx) ctx->noteStatistic(StNumBlobCacheHits, 1);
-                blobCacheHits++;
-                return LINK(cacheNode);
-            }
+            ::Release(node);
+            cacheHits++;
+            if (ctx) ctx->noteStatistic(hitStatId[cacheType], 1);
+            (*hitMetric[cacheType])++;
+            (*dupMetric[cacheType])++;
+            return LINK(cacheNode);
         }
-        break;
+        if (ctx) ctx->noteStatistic(addStatId[cacheType], 1);
+        (*addMetric[cacheType])++;
+        cache[cacheType].add(key, *LINK(node));
+        return node;
     }
-    CJHTreeNode *node;
-    {
-        CriticalUnblock block(lock);
-        node = keyIndex->loadNode(pos);  // NOTE - don't want cache locked while we load!
-    }
-    cacheAdds++;
-    switch (cacheType)
+    else
     {
-    case NodeBranch:
-        if (cacheNodes)
+        CJHTreeNode * node;
+        bool alreadyExists = true;
         {
-            CJHTreeNode *cacheNode = nodeCache.query(key); // check if added to cache while we were reading
-            if (cacheNode)
+            CriticalBlock block(cacheLock);
+
+            node = cache[cacheType].query(key);
+            if (unlikely(!node))
             {
-                ::Release(node);
-                cacheHits++;
-                if (ctx) ctx->noteStatistic(StNumNodeCacheHits, 1);
-                nodeCacheHits++;
-                return LINK(cacheNode);
+                node = keyIndex->createNode(type);
+                assertex(node->getMemSize() == 0);   // check the reported size is 0 so that the updated size is correct
+                cache[cacheType].add(key, *node);
+                alreadyExists = false;
             }
-            if (ctx) ctx->noteStatistic(StNumNodeCacheAdds, 1);
-            nodeCacheAdds++;
-            nodeCache.add(key, *LINK(node));
+            node->Link();
         }
-        break;
-    case NodeLeaf:
-        if (cacheLeaves)
+
+        //Move the atomic increments out of the critical section - they can be relatively expensive
+        if (likely(alreadyExists))
         {
-            CJHTreeNode *cacheNode = leafCache.query(key); // check if added to cache while we were reading
-            if (cacheNode)
-            {
-                ::Release(node);
-                cacheHits++;
-                if (ctx) ctx->noteStatistic(StNumLeafCacheHits, 1);
-                leafCacheHits++;
-                return LINK(cacheNode);
-            }
-            if (ctx) ctx->noteStatistic(StNumLeafCacheAdds, 1);
-            leafCacheAdds++;
-            leafCache.add(key, *LINK(node));
+            cacheHits++;
+            if (ctx) ctx->noteStatistic(hitStatId[cacheType], 1);
+            (*hitMetric[cacheType])++;
         }
-        break;
-    case NodeBlob:
-        if (cacheBlobs)
+        else
         {
-            CJHTreeNode *cacheNode = blobCache.query(key); // check if added to cache while we were reading
-            if (cacheNode)
-            {
-                ::Release(node);
-                cacheHits++;
-                if (ctx) ctx->noteStatistic(StNumBlobCacheHits, 1);
-                blobCacheHits++;
-                return LINK(cacheNode);
-            }
-            if (ctx) ctx->noteStatistic(StNumBlobCacheAdds, 1);
-            blobCacheAdds++;
-            blobCache.add(key, *LINK(node));
+            cacheAdds++;
+            if (ctx) ctx->noteStatistic(addStatId[cacheType], 1);
+            (*addMetric[cacheType])++;
         }
-        break;
+
+        //The common case is that this flag has already been set (by a previous add).
+        if (likely(node->isReady()))
+            return node;
+
+        //Shame that the hash code is recalculated - it might be possible to remove this.
+        unsigned hashcode = hashc(reinterpret_cast<const byte *>(&key), sizeof(key), 0x811C9DC5);
+        unsigned whichCs = hashcode % numLoadCritSects;
+        Owned<CJHTreeNode> ownedNode(node); // ensure node gets cleaned up if it fails to load
+
+        //Actually load the information outside the critical section
+        CriticalBlock loadBlock(loadCs[whichCs]);
+        if (!node->isReady())
+        {
+            keyIndex->loadNode(node, pos);
+
+            //Update the associated size of the entry in the hash table before setting isReady (never evicted until isReady is set)
+            cache[cacheType].noteReady(*node);
+            node->noteReady();
+        }
+        else
+            (*dupMetric[cacheType])++; // Would have previously loaded the page twice
+
+        return ownedNode.getClear();
     }
-    return node;
 }
 
 RelaxedAtomic<unsigned> cacheAdds;
@@ -2469,12 +2511,13 @@ RelaxedAtomic<unsigned> cacheHits;
 RelaxedAtomic<unsigned> nodesLoaded;
 RelaxedAtomic<unsigned> blobCacheHits;
 RelaxedAtomic<unsigned> blobCacheAdds;
+RelaxedAtomic<unsigned> blobCacheDups;
 RelaxedAtomic<unsigned> leafCacheHits;
 RelaxedAtomic<unsigned> leafCacheAdds;
+RelaxedAtomic<unsigned> leafCacheDups;
 RelaxedAtomic<unsigned> nodeCacheHits;
 RelaxedAtomic<unsigned> nodeCacheAdds;
-RelaxedAtomic<unsigned> preloadCacheHits;
-RelaxedAtomic<unsigned> preloadCacheAdds;
+RelaxedAtomic<unsigned> nodeCacheDups;
 
 void clearNodeStats()
 {
@@ -2483,12 +2526,13 @@ void clearNodeStats()
     nodesLoaded.store(0);
     blobCacheHits.store(0);
     blobCacheAdds.store(0);
+    blobCacheDups.store(0);
     leafCacheHits.store(0);
     leafCacheAdds.store(0);
+    leafCacheDups.store(0);
     nodeCacheHits.store(0);
     nodeCacheAdds.store(0);
-    preloadCacheHits.store(0);
-    preloadCacheAdds.store(0);
+    nodeCacheDups.store(0);
 }
 
 //------------------------------------------------------------------------------------------------

+ 4 - 2
system/jhtree/jhtree.hpp

@@ -150,6 +150,7 @@ extern jhtree_decl void clearNodeCache();
 extern jhtree_decl size32_t setNodeCacheMem(size32_t cacheSize);
 extern jhtree_decl size32_t setLeafCacheMem(size32_t cacheSize);
 extern jhtree_decl size32_t setBlobCacheMem(size32_t cacheSize);
+extern jhtree_decl void setLegacyNodeCache(bool _value);
 
 extern jhtree_decl void getNodeCacheInfo(ICacheInfoRecorder &cacheInfo);
 
@@ -170,12 +171,13 @@ extern jhtree_decl RelaxedAtomic<unsigned> cacheHits;
 extern jhtree_decl RelaxedAtomic<unsigned> cacheAdds;
 extern jhtree_decl RelaxedAtomic<unsigned> blobCacheHits;
 extern jhtree_decl RelaxedAtomic<unsigned> blobCacheAdds;
+extern jhtree_decl RelaxedAtomic<unsigned> blobCacheDups;
 extern jhtree_decl RelaxedAtomic<unsigned> leafCacheHits;
 extern jhtree_decl RelaxedAtomic<unsigned> leafCacheAdds;
+extern jhtree_decl RelaxedAtomic<unsigned> leafCacheDups;
 extern jhtree_decl RelaxedAtomic<unsigned> nodeCacheHits;
 extern jhtree_decl RelaxedAtomic<unsigned> nodeCacheAdds;
-extern jhtree_decl RelaxedAtomic<unsigned> preloadCacheHits;
-extern jhtree_decl RelaxedAtomic<unsigned> preloadCacheAdds;
+extern jhtree_decl RelaxedAtomic<unsigned> nodeCacheDups;
 extern jhtree_decl bool linuxYield;
 extern jhtree_decl bool traceSmartStepping;
 extern jhtree_decl bool flushJHtreeCacheOnOOM;

+ 10 - 4
system/jhtree/jhtree.ipp

@@ -65,9 +65,12 @@ enum request { LTE, GTE };
 // INodeLoader impl.
 interface INodeLoader
 {
-    virtual CJHTreeNode *loadNode(offset_t offset) = 0;
+    virtual CJHTreeNode * createNode(NodeType type) = 0;
+    virtual CJHTreeNode *loadNode(CJHTreeNode * optNode, offset_t offset) = 0;
     virtual CJHTreeNode *locateFirstNode(KeyStatsCollector &stats) = 0;
     virtual CJHTreeNode *locateLastNode(KeyStatsCollector &stats) = 0;
+
+    inline CJHTreeNode *loadNode(offset_t offset) { return loadNode(nullptr, offset); }
 };
 
 class jhtree_decl CKeyIndex : implements IKeyIndex, implements INodeLoader, public CInterface
@@ -93,6 +96,8 @@ protected:
     RelaxedAtomic<unsigned> keyScans;
     offset_t latestGetNodeOffset;
 
+    using INodeLoader::loadNode;
+    CJHTreeNode *loadNode(CJHTreeNode * ret, char *nodeData, offset_t pos, bool needsCopy);
     CJHTreeNode *loadNode(char *nodeData, offset_t pos, bool needsCopy);
     CJHTreeNode *getNode(offset_t offset, NodeType type, IContextLogger *ctx);
     CJHTreeBlobNode *getBlobNode(offset_t nodepos);
@@ -141,7 +146,8 @@ public:
     virtual bool prewarmPage(offset_t page, NodeType type);
  
  // INodeLoader impl.
-    virtual CJHTreeNode *loadNode(offset_t offset) = 0;
+    virtual CJHTreeNode * createNode(NodeType type) final;
+    virtual CJHTreeNode * loadNode(CJHTreeNode * optNode, offset_t offset) = 0;
     CJHTreeNode *locateFirstNode(KeyStatsCollector &stats);
     CJHTreeNode *locateLastNode(KeyStatsCollector &stats);
 };
@@ -156,7 +162,7 @@ public:
     virtual const char *queryFileName() { return name.get(); }
     virtual const IFileIO *queryFileIO() const override { return nullptr; }
 // INodeLoader impl.
-    virtual CJHTreeNode *loadNode(offset_t offset);
+    virtual CJHTreeNode *loadNode(CJHTreeNode * optNode, offset_t offset);
 };
 
 class jhtree_decl CDiskKeyIndex : public CKeyIndex
@@ -171,7 +177,7 @@ public:
     virtual const char *queryFileName() { return name.get(); }
     virtual const IFileIO *queryFileIO() const override { return io; }
 // INodeLoader impl.
-    virtual CJHTreeNode *loadNode(offset_t offset);
+    virtual CJHTreeNode *loadNode(CJHTreeNode * optNode, offset_t offset);
 };
 
 class jhtree_decl CKeyCursor : public CInterfaceOf<IKeyCursor>

+ 2 - 0
thorlcr/graph/thgraph.cpp

@@ -2811,12 +2811,14 @@ void CJobBase::startJob()
     unsigned keyNodeCacheMB = getWorkUnitValueInt("keyNodeCacheMB", DEFAULT_KEYNODECACHEMB * queryJobChannels());
     unsigned keyLeafCacheMB = getWorkUnitValueInt("keyLeafCacheMB", DEFAULT_KEYLEAFCACHEMB * queryJobChannels());
     unsigned keyBlobCacheMB = getWorkUnitValueInt("keyBlobCacheMB", DEFAULT_KEYBLOBCACHEMB * queryJobChannels());
+    bool legacyNodeCache = getWorkUnitValueBool("legacyNodeCache", false);
     keyNodeCacheBytes = ((memsize_t)0x100000) * keyNodeCacheMB;
     keyLeafCacheBytes = ((memsize_t)0x100000) * keyLeafCacheMB;
     keyBlobCacheBytes = ((memsize_t)0x100000) * keyBlobCacheMB;
     setNodeCacheMem(keyNodeCacheBytes);
     setLeafCacheMem(keyLeafCacheBytes);
     setBlobCacheMem(keyBlobCacheBytes);
+    setLegacyNodeCache(legacyNodeCache);
     PROGLOG("Key node caching setting: node=%u MB, leaf=%u MB, blob=%u MB", keyNodeCacheMB, keyLeafCacheMB, keyBlobCacheMB);
 
     unsigned keyFileCacheLimit = (unsigned)getWorkUnitValueInt("keyFileCacheLimit", 0);