Przeglądaj źródła

Merge pull request #15308 from jakesmith/hpcc-26443-thor-helm-memory-settings

HPCC-26443 Allow query vs other (third party) memory requirements

Reviewed-By: Gavin Halliday <gavin.halliday@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 3 lat temu
rodzic
commit
c1052c164e

+ 20 - 0
helm/hpcc/values.schema.json

@@ -480,6 +480,20 @@
     "resources": {
       "type": "object"
     },
+    "memory": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "type": "string",
+          "description": "The amount of overall resourced memory to dedicate to the query"
+        },
+        "thirdParties": {
+          "type": "string",
+          "description": "The amount of overall resource memory to reserve for 3rd party use"
+        }
+      },
+      "additionalProperties": false
+    },
     "secrets": {
       "oneOf": [
         {
@@ -1206,6 +1220,12 @@
           "type": "object",
           "additionalProperties": { "type": "string" }
         },
+        "workerMemory": {
+          "$ref": "#/definitions/memory"
+        },
+        "managerMemory": {
+          "$ref": "#/definitions/memory"
+        },
         "managerResources": {
           "$ref": "#/definitions/resources"
         },

+ 3 - 0
helm/hpcc/values.yaml

@@ -532,6 +532,9 @@ thor:
   #workerResources:
   #  cpu: "4"
   #  memory: "4G"
+  #workerMemory:
+  #  query: "3G"
+  #  thirdParty: "500M"
   #eclAgentResources:
   #  cpu: "1"
   #  memory: "2G"

+ 59 - 4
thorlcr/graph/thgraph.cpp

@@ -2714,7 +2714,7 @@ CJobBase::CJobBase(ILoadedDllEntry *_querySo, const char *_graphName) : querySo(
     maxDiskUsage = diskUsage = 0;
     dirty = true;
     aborted = false;
-    globalMemoryMB = globals->getPropInt("@globalMemorySize"); // in MB
+    queryMemoryMB = 0;
     channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1);
     numChannels = channelsPerSlave;
     pluginMap = new SafePluginMap(&pluginCtx, true);
@@ -2740,6 +2740,64 @@ CJobBase::CJobBase(ILoadedDllEntry *_querySo, const char *_graphName) : querySo(
         throwUnexpected();
 }
 
+void CJobBase::applyMemorySettings(float recommendedMaxPercentage, const char *context)
+{
+    // NB: 'total' memory has been calculated in advance from either resource settings or from system memory.
+    VStringBuffer totalMemorySetting("%sMemory/@total", context);
+    unsigned totalMemoryMB = globals->getPropInt(totalMemorySetting);
+
+    unsigned recommendedMaxMemoryMB = totalMemoryMB * recommendedMaxPercentage / 100;
+#ifdef _CONTAINERIZED
+    /* only "query" memory is actually used (if set, configures Thor roxiemem limit)
+     * others are only advisory, but totalled and checked to ensure within the total limit.
+     */
+    std::initializer_list<const char *> memorySettings = { "query", "thirdParty" };
+    offset_t totalRequirements = 0;
+    for (auto setting : memorySettings)
+    {
+        VStringBuffer workunitSettingName("%smemory.%s", context, setting); // NB: workunit options are case insensitive
+        StringBuffer memString;
+        getWorkUnitValue(workunitSettingName, memString);
+        if (0 == memString.length())
+        {
+            VStringBuffer globalSettingName("%sMemory/@%s", context, setting);
+            globals->getProp(globalSettingName, memString);
+        }
+        if (memString.length())
+        {
+            offset_t memBytes = friendlyStringToSize(memString);
+            if (streq("query", setting))
+                queryMemoryMB = (unsigned)(memBytes / 0x100000);
+            totalRequirements += memBytes;
+        }
+    }
+    unsigned totalRequirementsMB = (unsigned)(totalRequirements / 0x100000);
+    if (totalRequirementsMB > totalMemoryMB)
+        throw makeStringExceptionV(0, "The total memory requirements of the query (%u MB) exceeds the %s memory limit (%u MB)", totalRequirementsMB, context, totalMemoryMB);
+
+    if (totalRequirementsMB > recommendedMaxMemoryMB)
+    {
+        WARNLOG("The total memory requirements of the query (%u MB) exceed the recommended reserve limits for %s (total memory: %u MB, recommended max percentage : %.2f%%)", totalRequirementsMB, context, totalMemoryMB, recommendedMaxPercentage);
+    
+        // if "query" memory has not been defined, then use the remaining memory
+        if (0 == queryMemoryMB)
+            queryMemoryMB = totalMemoryMB - totalRequirementsMB;
+    }
+    else if (0 == queryMemoryMB)
+        queryMemoryMB = recommendedMaxMemoryMB - totalRequirementsMB;
+#else
+    queryMemoryMB = recommendedMaxMemoryMB;
+#endif
+
+    bool gmemAllowHugePages = globals->getPropBool("@heapUseHugePages", false);
+    gmemAllowHugePages = globals->getPropBool("@heapMasterUseHugePages", gmemAllowHugePages);
+    bool gmemAllowTransparentHugePages = globals->getPropBool("@heapUseTransparentHugePages", true);
+    bool gmemRetainMemory = globals->getPropBool("@heapRetainMemory", false);
+    roxiemem::setTotalMemoryLimit(gmemAllowHugePages, gmemAllowTransparentHugePages, gmemRetainMemory, ((memsize_t)queryMemoryMB) * 0x100000, 0, thorAllocSizes, NULL);
+
+    PROGLOG("Total memory = %u MB, query memory = %u MB, memory spill at = %u", totalMemoryMB, queryMemoryMB, memorySpillAtPercentage);
+}
+
 void CJobBase::init()
 {
     StringBuffer tmp;
@@ -2768,13 +2826,10 @@ void CJobBase::init()
     crcChecking = 0 != getWorkUnitValueInt("THOR_ROWCRC", globals->getPropBool("@THOR_ROWCRC", false));
     usePackedAllocator = 0 != getWorkUnitValueInt("THOR_PACKEDALLOCATOR", globals->getPropBool("@THOR_PACKEDALLOCATOR", true));
     memorySpillAtPercentage = (unsigned)getWorkUnitValueInt("memorySpillAt", globals->getPropInt("@memorySpillAt", 80));
-    sharedMemoryLimitPercentage = (unsigned)getWorkUnitValueInt("globalMemoryLimitPC", globals->getPropInt("@sharedMemoryLimit", 90));
-    sharedMemoryMB = globalMemoryMB*sharedMemoryLimitPercentage/100;
     failOnLeaks = getOptBool("failOnLeaks");
     maxLfnBlockTimeMins = getOptInt(THOROPT_MAXLFN_BLOCKTIME_MINS, DEFAULT_MAXLFN_BLOCKTIME_MINS);
     soapTraceLevel = getOptInt("soapTraceLevel", 1);
 
-    PROGLOG("Global memory size = %d MB, shared memory = %d%%, memory spill at = %d%%", globalMemoryMB, sharedMemoryLimitPercentage, memorySpillAtPercentage);
     StringBuffer tracing("maxActivityCores = ");
     if (maxActivityCores)
         tracing.append(maxActivityCores);

+ 26 - 2
thorlcr/graph/thgraph.hpp

@@ -69,6 +69,29 @@
 #define THORDATALINK_COUNT_MASK         (RCMAX>>2)                                  // mask to extract count value only
 
 
+/* These percentages are used to determine the amount roxiemem allocated
+ * from total system memory.
+ *
+ * For historical reasons the default in bare-metal has always been a
+ * conservative 75% of system memory, leaving 25% free for the heap/OS etc.
+ * In container mode a more aggresive default of 90% is used.
+ *
+ * In bare-metal, these percentages do not apply if
+ * 'globalMemorySize' and/or 'masterMemorySize' are configured.
+ * 
+ * In container mode, workerMemory and/or masterMemory can be used to override
+ * these default percentages. However, the defaults percentages will stil be
+ * used to give a warning if the workerMemory or masterMemory totals exceed the
+ * defaults.
+ */
+
+#ifdef _CONTAINERIZED
+constexpr float defaultPctSysMemForRoxie = 90.0;
+#else
+constexpr float defaultPctSysMemForRoxie = 75.0;
+#endif
+
+
 
 enum ActivityAttributes { ActAttr_Source=1, ActAttr_Sink=2 };
 const static roxiemem::RoxieHeapFlags defaultHeapFlags = roxiemem::RHFscanning;
@@ -829,7 +852,7 @@ protected:
     bool timeActivities;
     unsigned channelsPerSlave;
     unsigned numChannels;
-    unsigned maxActivityCores, globalMemoryMB, sharedMemoryMB;
+    unsigned maxActivityCores, queryMemoryMB, sharedMemoryMB;
     unsigned forceLogGraphIdMin, forceLogGraphIdMax;
     Owned<IContextLogger> logctx;
     Owned<IPerfMonHook> perfmonhook;
@@ -840,7 +863,7 @@ protected:
     bool usePackedAllocator;
     rank_t myNodeRank;
     Owned<IPropertyTree> graphXGMML;
-    unsigned memorySpillAtPercentage, sharedMemoryLimitPercentage;
+    unsigned memorySpillAtPercentage;
     CriticalSection sharedAllocatorCrit;
     Owned<IThorAllocator> sharedAllocator;
     bool jobEnded = false;
@@ -899,6 +922,7 @@ public:
     virtual IGraphTempHandler *createTempHandler(bool errorOnMissing) = 0;
     void addDependencies(IPropertyTree *xgmml, bool failIfMissing=true);
     void addSubGraph(IPropertyTree &xgmml);
+    void applyMemorySettings(float recommendReservePercentage, const char *context);
 
     void checkAndReportLeaks(roxiemem::IRowManager *rowManager);
     bool queryUseCheckpoints() const;

+ 9 - 2
thorlcr/graph/thgraphmaster.cpp

@@ -1301,7 +1301,6 @@ CJobMaster::CJobMaster(IConstWorkUnit &_workunit, const char *graphName, ILoaded
     user.set(workunit->queryUser());
     token.append(_token.str());
     scope.append(_scope.str());
-    globalMemoryMB = globals->getPropInt("@masterMemorySize", globals->getPropInt("@globalMemorySize")); // in MB
     numChannels = 1;
     init();
 
@@ -1347,7 +1346,15 @@ CJobMaster::CJobMaster(IConstWorkUnit &_workunit, const char *graphName, ILoaded
         plugin.getPluginName(name);
         loadPlugin(pluginMap, pluginsDir.str(), name.str());
     }
-    sharedAllocator.setown(::createThorAllocator(globalMemoryMB, 0, 1, memorySpillAtPercentage, *logctx, crcChecking, usePackedAllocator));
+
+    float recommendedMaxPercentage = defaultPctSysMemForRoxie;
+#ifndef _CONTAINERIZED
+    // @localThor mode - 25% is used for manager and 50% is used for workers
+    if (globals->getPropBool("@localThor") && (0 == globals->getPropInt("@masterMemorySize")))
+        recommendedMaxPercentage = 25.0;
+#endif
+    applyMemorySettings(recommendedMaxPercentage, "manager");
+    sharedAllocator.setown(::createThorAllocator(queryMemoryMB, 0, 1, memorySpillAtPercentage, *logctx, crcChecking, usePackedAllocator));
     Owned<IMPServer> mpServer = getMPServer();
     CJobChannel *channel = addChannel(mpServer);
     channel->reservePortKind(TPORT_mp); 

+ 41 - 1
thorlcr/graph/thgraphslave.cpp

@@ -1686,7 +1686,47 @@ CJobSlave::CJobSlave(ISlaveWatchdog *_watchdog, IPropertyTree *_workUnitInfo, co
         pluginMap->loadFromList(pluginsList.str());
     }
     tmpHandler.setown(createTempHandler(true));
-    sharedAllocator.setown(::createThorAllocator(globalMemoryMB, sharedMemoryMB, numChannels, memorySpillAtPercentage, *logctx, crcChecking, usePackedAllocator));
+
+    /*
+     * Calculate maximum recommended memory for this worker.
+     * In container mode, there is only ever 1 worker per container,
+     * recommendedMaxPercentage = defaultPctSysMemForRoxie
+     * In bare-metal slavesPerNode is taken into account and @localThor if used.
+     * 
+     * recommendedMaxPercentage is used by applyMemorySettings to calculate the
+     * max amount of meemory that should be used (allowing enough left for heap/OS etc.)
+     */
+#ifdef _CONTAINERIZED
+    float recommendedMaxPercentage = defaultPctSysMemForRoxie;
+#else
+    // bare-metal only
+
+    float recommendedMaxPercentage = defaultPctSysMemForRoxie;
+    unsigned numWorkersPerNode = globals->getPropInt("@slavesPerNode", 1);
+
+    // @localThor mode - 25% is used for manager and 50% is used for workers
+    if (globals->getPropBool("@localThor") && (0 == globals->getPropInt("@masterMemorySize")))
+    {
+        /* In this mode, 25% is reserved for manager,
+         * 50% for the workers.
+         * Meaning this workers' recommendedMaxPercentage is remaining percentage */
+        float pctPerWorker = 50.0 / numWorkersPerNode;
+        recommendedMaxPercentage = pctPerWorker;
+    }
+    else
+    {
+        // deduct percentage for all other workers from max percentage
+        float pctPerWorker = defaultPctSysMemForRoxie / numWorkersPerNode;
+        recommendedMaxPercentage = pctPerWorker;
+    }
+#endif
+    applyMemorySettings(recommendedMaxPercentage, "worker");
+
+    unsigned sharedMemoryLimitPercentage = (unsigned)getWorkUnitValueInt("globalMemoryLimitPC", globals->getPropInt("@sharedMemoryLimit", 90));
+    unsigned sharedMemoryMB = queryMemoryMB*sharedMemoryLimitPercentage/100;
+    PROGLOG("Shared memory = %d%%", sharedMemoryLimitPercentage);
+
+    sharedAllocator.setown(::createThorAllocator(queryMemoryMB, sharedMemoryMB, numChannels, memorySpillAtPercentage, *logctx, crcChecking, usePackedAllocator));
 
     StringBuffer remoteCompressedOutput;
     getOpt("remoteCompressedOutput", remoteCompressedOutput);

+ 18 - 60
thorlcr/master/thmastermain.cpp

@@ -74,19 +74,6 @@
 #define SHUTDOWN_IN_PARALLEL 20
 
 
-/* These percentages are used to determine the amount roxiemem allocated
- * from total system memory.
- *
- * For historical reasons the default in bare-metal has always been a
- * conservative 75%.
- *
- * NB: These percentages do not apply if the memory amount has been configured
- * manually via 'globalMemorySize' and 'masterMemorySize'
- */
-
-static constexpr unsigned bareMetalRoxieMemPC = 75;
-static constexpr unsigned containerRoxieMemPC = 90;
-
 
 class CThorEndHandler : implements IThreaded
 {
@@ -744,7 +731,8 @@ int main( int argc, const char *argv[]  )
             Owned<IPropertyTree> masterNasFilters = envGetInstallNASHooks(nasConfig, &thorEp);
         }
 #endif
-        
+
+
         HardwareInfo hdwInfo;
         getHardwareInfo(hdwInfo);
         globals->setPropInt("@masterTotalMem", hdwInfo.totalMemory);
@@ -758,62 +746,41 @@ int main( int argc, const char *argv[]  )
             {
                 offset_t sizeBytes = friendlyStringToSize(workerResourcedMemory);
                 gmemSize = (unsigned)(sizeBytes / 0x100000);
-                gmemSize = gmemSize * containerRoxieMemPC / 100;
             }
             else
             {
-                unsigned maxMem = hdwInfo.totalMemory;
+                gmemSize = hdwInfo.totalMemory;
 #ifdef _WIN32
-                if (maxMem > 2048)
-                    maxMem = 2048;
+                if (gmemSize > 2048)
+                    gmemSize = 2048;
 #else
 #ifndef __64BIT__
-                if (maxMem > 2048)
+                if (gmemSize > 2048)
                 {
                     // 32 bit OS doesn't handle whole physically installed RAM
-                    maxMem = 2048;
+                    gmemSize = 2048;
                 }
 #ifdef __ARM_ARCH_7A__
                 // For ChromeBook with 2GB RAM
-                if (maxMem <= 2048)
+                if (gmemSize <= 2048)
                 {
                     // Decrease max memory to 2/3 
-                    maxMem = maxMem * 2 / 3; 
+                    gmemSize = gmemSize * 2 / 3; 
                 }
 #endif            
 #endif
 #endif
-                if (isContainerized())
-                    gmemSize = maxMem * containerRoxieMemPC / 100; // NB: MB's
-                else
-                {
-                    if (globals->getPropBool("@localThor") && 0 == mmemSize)
-                    {
-                        gmemSize = maxMem / 2; // 50% of total for slaves
-                        mmemSize = maxMem / 4; // 25% of total for master
-                    }
-                    else
-                        gmemSize = maxMem * bareMetalRoxieMemPC / 100; // NB: MB's
-                }
             }
-            unsigned perSlaveSize = gmemSize;
-#ifndef _CONTAINERIZED
-            if (slavesPerNode>1)
-            {
-                PROGLOG("Sharing globalMemorySize(%d MB), between %d slave processes. %d MB each", perSlaveSize, slavesPerNode, perSlaveSize / slavesPerNode);
-                perSlaveSize /= slavesPerNode;
-            }
-#endif
-            globals->setPropInt("@globalMemorySize", perSlaveSize);
         }
-        else
+        IPropertyTree *workerMemory = ensurePTree(globals, "workerMemory");
+        workerMemory->setPropInt("@total", gmemSize);
+
+        if (mmemSize)
         {
-            if (gmemSize >= hdwInfo.totalMemory)
-            {
-                // should prob. error here
-            }
+            if (mmemSize > hdwInfo.totalMemory)
+                OWARNLOG("Configured manager memory size (%u MB) is greater than total hardware memory (%u MB)", mmemSize, hdwInfo.totalMemory);
         }
-        if (0 == mmemSize)
+        else
         {
             // NB: This could be in a isContainerized(), but the 'managerResources' section only applies to containerized setups
             const char *managerResourcedMemory = globals->queryProp("managerResources/@memory");
@@ -821,22 +788,13 @@ int main( int argc, const char *argv[]  )
             {
                 offset_t sizeBytes = friendlyStringToSize(managerResourcedMemory);
                 mmemSize = (unsigned)(sizeBytes / 0x100000);
-                mmemSize = mmemSize * containerRoxieMemPC / 100;
             }
             else
                 mmemSize = gmemSize; // default to same as slaves
         }
 
-        bool gmemAllowHugePages = globals->getPropBool("@heapUseHugePages", false);
-        gmemAllowHugePages = globals->getPropBool("@heapMasterUseHugePages", gmemAllowHugePages);
-        bool gmemAllowTransparentHugePages = globals->getPropBool("@heapUseTransparentHugePages", true);
-        bool gmemRetainMemory = globals->getPropBool("@heapRetainMemory", false);
-
-        // if @masterMemorySize and @globalMemorySize unspecified gmemSize will be default based on h/w
-        globals->setPropInt("@masterMemorySize", mmemSize);
-
-        PROGLOG("Global memory size = %d MB", mmemSize);
-        roxiemem::setTotalMemoryLimit(gmemAllowHugePages, gmemAllowTransparentHugePages, gmemRetainMemory, ((memsize_t)mmemSize) * 0x100000, 0, thorAllocSizes, NULL);
+        IPropertyTree *managerMemory = ensurePTree(globals, "managerMemory");
+        managerMemory->setPropInt("@total", mmemSize);
 
         char thorPath[1024];
         if (!GetCurrentDirectory(1024, thorPath))

+ 0 - 10
thorlcr/slave/slavmain.cpp

@@ -2356,16 +2356,6 @@ void slaveMain(bool &jobListenerStopped, ILogMsgHandler *logHandler)
     getHardwareInfo(hdwInfo);
     if (hdwInfo.totalMemory < masterMemMB)
         OWARNLOG("Slave has less memory than master node");
-    unsigned gmemSize = globals->getPropInt("@globalMemorySize");
-    bool gmemAllowHugePages = globals->getPropBool("@heapUseHugePages", false);
-    bool gmemAllowTransparentHugePages = globals->getPropBool("@heapUseTransparentHugePages", true);
-    bool gmemRetainMemory = globals->getPropBool("@heapRetainMemory", false);
-
-    if (gmemSize >= hdwInfo.totalMemory)
-    {
-        // should prob. error here
-    }
-    roxiemem::setTotalMemoryLimit(gmemAllowHugePages, gmemAllowTransparentHugePages, gmemRetainMemory, ((memsize_t)gmemSize) * 0x100000, 0, thorAllocSizes, NULL);
 
     CThorResourceSlave slaveResource;
     CJobListener jobListener(jobListenerStopped);