Sfoglia il codice sorgente

Merge branch 'candidate-8.2.x'

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 3 anni fa
parent
commit
0fc8d23b94
83 ha cambiato i file con 1683 aggiunte e 725 eliminazioni
  1. 2 2
      cmake_modules/go_gold.sh
  2. 5 4
      cmake_modules/parse_cmake.sh
  3. 0 2
      common/remote/hooks/azure/azurefile.cpp
  4. 0 1
      common/remote/hooks/s3/s3file.cpp
  5. 71 39
      common/workunit/workunit.cpp
  6. 3 0
      common/workunit/workunit.hpp
  7. 2 1
      configuration/cli/envmod/envmod.cpp
  8. 5 8
      dali/sasha/sacmd.cpp
  9. 2 2
      dockerfiles/README.md
  10. 44 16
      dockerfiles/startall.sh
  11. 11 2
      dockerfiles/stopall.sh
  12. 27 7
      ecl/eclcc/eclcc.cpp
  13. 3 3
      ecl/eclccserver/eclccserver.cpp
  14. 15 0
      ecllibrary/std/File.ecl
  15. BIN
      elastic4hpcclogs-0.1.0.tgz
  16. 1 1
      esp/bindings/http/platform/httpprot.cpp
  17. 1 1
      esp/bindings/http/platform/httptransport.cpp
  18. 1 0
      esp/bindings/http/platform/httptransport.hpp
  19. 1 1
      esp/scm/ws_workunits.ecm
  20. 1 0
      esp/scm/ws_workunits_queryset_req_resp.ecm
  21. 1 0
      esp/scm/ws_workunits_struct.ecm
  22. 17 7
      esp/services/ws_workunits/ws_workunitsQuerySets.cpp
  23. 1 1
      esp/src/eclwatch/LZBrowseWidget.js
  24. 10 1
      esp/src/eclwatch/QuerySetQueryWidget.js
  25. 2 2
      esp/src/eclwatch/TargetSelectClass.js
  26. 5 6
      esp/src/eclwatch/TimingPageWidget.js
  27. 6 0
      esp/src/eclwatch/templates/QuerySetQueryWidget.html
  28. 1 2
      esp/test/httptest/httptest.cpp
  29. 4 4
      esp/test/httptest/httptest.hpp
  30. 3 23
      helm/examples/azure/hpcc-azurefile/templates/NOTES.txt
  31. 3 23
      helm/examples/efs/hpcc-efs/templates/NOTES.txt
  32. 3 23
      helm/examples/filestore/hpcc-filestore/templates/NOTES.txt
  33. 2 19
      helm/examples/local/hpcc-localfile/templates/NOTES.txt
  34. 22 0
      helm/examples/metrics/README.md
  35. 19 0
      helm/examples/metrics/filesink.yaml
  36. 13 0
      helm/examples/metrics/loggingsink.yaml
  37. 17 0
      helm/examples/metrics/prometheus_metrics.yaml
  38. 4 28
      helm/examples/nfs/hpcc-nfs/templates/NOTES.txt
  39. 1 1
      helm/hpcc/docs/changes.md
  40. 191 0
      helm/hpcc/docs/storage.md
  41. 4 0
      helm/hpcc/templates/NOTES.txt
  42. 15 1
      helm/hpcc/templates/_helpers.tpl
  43. 23 0
      helm/hpcc/values.schema.json
  44. 19 21
      helm/hpcc/values.yaml
  45. 7 4
      helm/managed/logging/elastic/Chart.yaml
  46. 93 3
      helm/managed/logging/elastic/README.md
  47. 39 16
      helm/managed/logging/elastic/values.yaml
  48. 0 92
      helm/storage.rst
  49. 1 1
      initfiles/bash/etc/init.d/pid.sh
  50. 1 1
      initfiles/componentfiles/configxml/roxie.xsd.in
  51. 1 1
      initfiles/etc/DIR_NAME/environment.xml.in
  52. 37 0
      plugins/fileservices/fileservices.cpp
  53. 1 0
      plugins/fileservices/fileservices.hpp
  54. 1 0
      plugins/parselib/parselib.cpp
  55. 1 0
      plugins/unicodelib/unicodelib.cpp
  56. 3 0
      roxie/CMakeLists.txt
  57. 4 4
      roxie/ccd/CMakeLists.txt
  58. 21 20
      roxie/ccd/ccdcontext.cpp
  59. 100 213
      roxie/ccd/ccdfile.cpp
  60. 1 0
      roxie/ccd/ccdfile.hpp
  61. 9 6
      roxie/ccd/ccdlistener.cpp
  62. 30 7
      roxie/ccd/ccdprotocol.cpp
  63. 1 1
      roxie/ccd/ccdserver.cpp
  64. 1 1
      roxie/ccd/hpccprotocol.hpp
  65. 73 0
      roxie/ccdcache/CMakeLists.txt
  66. 323 0
      roxie/ccdcache/ccdcache.cpp
  67. 83 0
      roxie/ccdcache/ccdcache.hpp
  68. 32 70
      roxie/roxiemem/roxiemem.cpp
  69. 3 2
      roxie/topo/toposerver.cpp
  70. 87 0
      system/jlib/jhash.hpp
  71. 0 2
      system/jlib/jlib.hpp
  72. 1 0
      system/jlib/jlog.hpp
  73. 2 0
      testing/regress/ecl/dropzone_query_test.ecl
  74. 12 5
      testing/regress/ecl/memcachedtest.ecl
  75. 1 1
      testing/regress/environment.xml.in
  76. 34 0
      testing/unittests/jlibtests.cpp
  77. 2 1
      thorlcr/activities/diskread/thdiskreadslave.cpp
  78. 1 1
      thorlcr/activities/indexread/thindexreadslave.cpp
  79. 38 9
      thorlcr/activities/thdiskbase.cpp
  80. 5 2
      thorlcr/activities/thdiskbase.ipp
  81. 29 5
      thorlcr/activities/thdiskbaseslave.cpp
  82. 2 1
      thorlcr/activities/thdiskbaseslave.ipp
  83. 22 4
      thorlcr/master/thmastermain.cpp

+ 2 - 2
cmake_modules/go_gold.sh

@@ -67,7 +67,7 @@ set_tag
 
 # Commit the change
 doit "git add $VERSIONFILE"
-doit "git commit -s -m \"$HPCC_NAME $HPCC_SHORT_TAG Gold\""
+doit "git commit -s -m \"$HPCC_NAME $HPCC_SHORT_TAG-$HPCC_SEQUENCE Gold\""
 doit "git push $REMOTE $GIT_BRANCH $FORCE"
 
 # tag it
@@ -99,7 +99,7 @@ if [ -e helm/hpcc/Chart.yaml ] ; then
   doit "helm repo index . --url https://hpcc-systems.github.io/helm-chart"
   doit "git add *.tgz"
   
-  doit "git commit -a -s -m \"$HPCC_NAME Helm Charts $HPCC_SHORT_TAG\""
+  doit "git commit -a -s -m \"$HPCC_NAME Helm Charts $HPCC_SHORT_TAG-$HPCC_SEQUENCE\""
   if [[ "$HPCC_MAJOR" == "8" ]] && [[ "$HPCC_MINOR" == "2" ]] ; then
     doit "git tag $FORCE $HPCC_MAJOR.$HPCC_MINOR.$HPCC_POINT && git push $REMOTE $HPCC_MAJOR.$HPCC_MINOR.$HPCC_POINT $FORCE"
   fi

+ 5 - 4
cmake_modules/parse_cmake.sh

@@ -119,12 +119,13 @@ function doit2()
 function set_tag()
 {
     local _prefix=$1
-    local _maturity=$HPCC_MATURITY
     if [ "$HPCC_MATURITY" = "release" ]; then
-      _maturity=
+      HPCC_SHORT_TAG=$HPCC_MAJOR.$HPCC_MINOR.$HPCC_POINT
+      HPCC_LONG_TAG=${_prefix}_$HPCC_SHORT_TAG-$HPCC_SEQUENCE
+    else
+      HPCC_SHORT_TAG=$HPCC_MAJOR.$HPCC_MINOR.$HPCC_POINT-$HPCC_MATURITY$HPCC_SEQUENCE
+      HPCC_LONG_TAG=${_prefix}_$HPCC_SHORT_TAG
     fi
-    HPCC_SHORT_TAG=$HPCC_MAJOR.$HPCC_MINOR.$HPCC_POINT-$_maturity$HPCC_SEQUENCE
-    HPCC_LONG_TAG=${_prefix}_$HPCC_SHORT_TAG
 }
 
 function update_version_file()

+ 0 - 2
common/remote/hooks/azure/azurefile.cpp

@@ -210,7 +210,6 @@ public:
     virtual IDirectoryIterator *directoryFiles(const char *mask, bool sub, bool includeDirs) override
     {
         UNIMPLEMENTED_X("AzureFile::directoryFiles");
-        return createNullDirectoryIterator();
     }
     virtual bool getInfo(bool &isdir,offset_t &size,CDateTime &modtime) override
     {
@@ -431,7 +430,6 @@ void AzureFileAppendBlobWriteIO::close()
 offset_t AzureFileAppendBlobWriteIO::appendFile(IFile *file, offset_t pos, offset_t len)
 {
     UNIMPLEMENTED_X("AzureFileAppendBlobWriteIO::appendFile");
-    return 0;
 }
 
 offset_t AzureFileAppendBlobWriteIO::size()

+ 0 - 1
common/remote/hooks/s3/s3file.cpp

@@ -225,7 +225,6 @@ public:
     virtual IDirectoryIterator *directoryFiles(const char *mask, bool sub, bool includeDirs) override
     {
         UNIMPLEMENTED;
-        return createNullDirectoryIterator();
     }
     virtual bool getInfo(bool &isdir,offset_t &size,CDateTime &modtime) override
     {

+ 71 - 39
common/workunit/workunit.cpp

@@ -14137,23 +14137,31 @@ KeepK8sJobs translateKeepJobs(const char *keepJob)
     return KeepK8sJobs::none;
 }
 
+// NB: will fire an exception if command fails (returns non-zero exit code)
+static void runKubectlCommand(const char *title, const char *cmd, const char *input, StringBuffer *output)
+{
+    StringBuffer _output, error;
+    if (!output)
+        output = &_output;
+    unsigned ret = runExternalCommand(title, *output, error, cmd, input);
+    if (output->length())
+        MLOG(MCExtraneousInfo, unknownJob, "%s: ret=%u, stdout=%s", cmd, ret, output->trimRight().str());
+    if (error.length())
+        MLOG(MCinternalError, unknownJob, "%s: ret=%u, stderr=%s", cmd, ret, error.trimRight().str());
+    if (ret)
+    {
+        if (input)
+            MLOG(MCinternalError, unknownJob, "Using input %s", input);
+        throw makeStringExceptionV(0, "Failed to run %s: error %u: %s", cmd, ret, error.str());
+    }
+}
+
 void deleteK8sResource(const char *componentName, const char *job, const char *resource)
 {
     VStringBuffer jobname("%s-%s", componentName, job);
     jobname.toLowerCase();
     VStringBuffer deleteResource("kubectl delete %s/%s", resource, jobname.str());
-    StringBuffer output, error;
-    bool ret = runExternalCommand(componentName, output, error, deleteResource.str(), nullptr);
-    DBGLOG("kubectl delete output: %s", output.trimRight().str());
-    if (error.length())
-        DBGLOG("kubectl delete error: %s", error.trimRight().str());
-    if (ret)
-    {
-        StringBuffer errorText("Failed to run kubectl delete");
-        if (error.length())
-            errorText.append(", error: ").append(error);
-        throw makeStringException(0, errorText);
-    }
+    runKubectlCommand(componentName, deleteResource, nullptr, nullptr);
 }
 
 void waitK8sJob(const char *componentName, const char *job, unsigned pendingTimeoutSecs, KeepK8sJobs keepJob)
@@ -14173,27 +14181,19 @@ void waitK8sJob(const char *componentName, const char *job, unsigned pendingTime
     {
         for (;;)
         {
-            StringBuffer output, error;
-            unsigned ret = runExternalCommand(nullptr, output, error, waitJob.str(), nullptr);
-            if (ret || error.length())
-                throw makeStringExceptionV(0, "Failed to run %s: error %u: %s", waitJob.str(), ret, error.str());
+            StringBuffer output;
+            runKubectlCommand(componentName, waitJob, nullptr, &output);
             if (!streq(output, "1"))  // status.active value
             {
                 // Job is no longer active - we can terminate
                 DBGLOG("kubectl jobs output: %s", output.str());
-                unsigned ret = runExternalCommand(nullptr, output.clear(), error.clear(), checkJobExitCode.str(), nullptr);
-                if (ret || error.length())
-                    throw makeStringExceptionV(0, "Failed to run %s: error %u: %s", checkJobExitCode.str(), ret, error.str());
+                runKubectlCommand(componentName, checkJobExitCode, nullptr, &output.clear());
                 if (output.length() && !streq(output, "0"))  // state.terminated.exitCode
                     throw makeStringExceptionV(0, "Failed to run %s: pod exited with error: %s", jobname.str(), output.str());
                 break;
             }
-            ret = runExternalCommand(nullptr, output.clear(), error.clear(), getScheduleStatus.str(), nullptr);
-            if (error.length())
-            {
-                DBGLOG("kubectl get schedule status error: %s", error.str());
-                break;
-            }
+            runKubectlCommand(nullptr, getScheduleStatus, nullptr, &output.clear());
+
             // Check whether pod has been scheduled yet - if resources are not available pods may block indefinitely waiting to be scheduled, and
             // we would prefer them to fail instead.
             bool pending = streq(output, "False");
@@ -14201,7 +14201,7 @@ void waitK8sJob(const char *componentName, const char *job, unsigned pendingTime
             {
                 schedulingTimeout = true;
                 VStringBuffer getReason("kubectl get pods --selector=job-name=%s \"--output=jsonpath={range .items[*].status.conditions[?(@.type=='PodScheduled')]}{.reason}{': '}{.message}{end}\"", jobname.str());
-                runExternalCommand(componentName, output.clear(), error.clear(), getReason.str(), nullptr);
+                runKubectlCommand(componentName, getReason, nullptr, &output.clear());
                 throw makeStringExceptionV(0, "Failed to run %s - pod not scheduled after %u seconds: %s ", jobname.str(), pendingTimeoutSecs, output.str());
             }
             MilliSleep(delay);
@@ -14271,19 +14271,7 @@ bool applyK8sYaml(const char *componentName, const char *wuid, const char *job,
     }
 #endif
 
-    StringBuffer output, error;
-    unsigned ret = runExternalCommand(componentName, output, error, "kubectl replace --force -f -", jobYaml.str());
-    DBGLOG("kubectl output: %s", output.trimRight().str());
-    if (error.length())
-        DBGLOG("kubectl error: %s", error.trimRight().str());
-    if (ret)
-    {
-        DBGLOG("Using yaml %s", jobYaml.str());
-        StringBuffer errorText("Failed to replace k8s resource");
-        if (error.length())
-            errorText.append(", error: ").append(error);
-        throw makeStringException(0, errorText);
-    }
+    runKubectlCommand(componentName, "kubectl replace --force -f -", jobYaml, nullptr);
     return true;
 }
 
@@ -14312,4 +14300,48 @@ void runK8sJob(const char *componentName, const char *wuid, const char *job, con
         throw exception.getClear();
 }
 
+
+std::pair<std::string, unsigned> getExternalService(const char *serviceName)
+{
+    static CTimeLimitedCache<std::string, std::pair<std::string, unsigned>> externalServiceCache;
+    static CriticalSection externalServiceCacheCrit;
+
+    {
+        CriticalBlock b(externalServiceCacheCrit);
+        std::pair<std::string, unsigned> cachedExternalSevice;
+        if (externalServiceCache.get(serviceName, cachedExternalSevice))
+            return cachedExternalSevice;
+    }
+
+    StringBuffer output;
+    try
+    {
+        VStringBuffer getServiceCmd("kubectl get svc --selector=server=%s --output=jsonpath={.items[0].status.loadBalancer.ingress[0].hostname},{.items[0].spec.ports[0].port}", serviceName);
+        runKubectlCommand("get-external-service", getServiceCmd, nullptr, &output);
+    }
+    catch (IException *e)
+    {
+        EXCLOG(e);
+        VStringBuffer exceptionText("Failed to get external service for '%s'. Error: [%d, ", serviceName, e->errorCode());
+        e->errorMessage(exceptionText).append("]");
+        e->Release();
+        throw makeStringException(-1, exceptionText);
+    }
+    StringArray fields;
+    fields.appendList(output, ",");
+
+    // NB: add even if no result, want non-result to be cached too
+    std::string host;
+    unsigned port = 0;
+    if (fields.ordinality())
+    {
+        host = fields.item(0);
+        if (fields.ordinality()>1)
+            port = atoi(fields.item(1));
+    }
+    auto servicePair = std::make_pair(host, port);
+    externalServiceCache.add(serviceName, servicePair);
+    return servicePair;
+}
+
 #endif

+ 3 - 0
common/workunit/workunit.hpp

@@ -1762,6 +1762,9 @@ extern WORKUNIT_API void deleteK8sResource(const char *componentName, const char
 extern WORKUNIT_API void waitK8sJob(const char *componentName, const char *job, unsigned pendingTimeoutSecs, KeepK8sJobs keepJob);
 extern WORKUNIT_API bool applyK8sYaml(const char *componentName, const char *wuid, const char *job, const char *suffix, const std::list<std::pair<std::string, std::string>> &extraParams, bool optional);
 extern WORKUNIT_API void runK8sJob(const char *componentName, const char *wuid, const char *job, const std::list<std::pair<std::string, std::string>> &extraParams={});
+
+// return the k8s external host and port for serviceName
+extern WORKUNIT_API std::pair<std::string, unsigned> getExternalService(const char *serviceName);
 #endif
 
 #endif

+ 2 - 1
configuration/cli/envmod/envmod.cpp

@@ -83,7 +83,7 @@ void usage()
     //
     // usage below documents options
     std::cout << std::endl;
-    std::cout << "envmod <options> envfile" << std::endl;
+    std::cout << "envmod <options>" << std::endl;
     std::cout << std::endl;
     std::cout << "  Configuration Options:" << std::endl;
     std::cout << "    -d --schema-dir <path>            : path to schema files. default (" << configSchemaDir << ")" << std::endl;
@@ -112,6 +112,7 @@ void usage()
 
 int main(int argc, char *argv[])
 {
+    InitModuleObjects();
     std::string modTemplateSchemaFile = std::string(hpccBuildInfo.componentDir) + PATHSEPSTR + "configschema" + PATHSEPSTR + "templates" + PATHSEPSTR + "schema" + PATHSEPSTR + "ModTemplateSchema.json";
     
     std::string processPath(queryCurrentProcessPath());

+ 5 - 8
dali/sasha/sacmd.cpp

@@ -19,8 +19,8 @@ class CSashaCommand: public CInterface, implements ISashaCommand
 {
     SashaCommandAction action;
     StringAttrArray ids;
-    CDateTime *dts;
-    unsigned numdts;
+    CDateTime *dts = nullptr;
+    unsigned numdts = 0;
 
     StringAttr after; //datetime
     StringAttr before; //datetime
@@ -43,8 +43,8 @@ class CSashaCommand: public CInterface, implements ISashaCommand
 #endif
     StringAttr xslt;
     StringAttrArray results;
-    unsigned resultsize;
-    bool resultoverflow;
+    unsigned resultsize = 0;
+    bool resultoverflow = false;
     bool online;
     bool archived;
     bool dfu;
@@ -73,13 +73,10 @@ public:
         dfu = false;
         start = 0;
         limit = 0x7fffffff;
-        resultoverflow = false;
-        resultsize = 0;
         wuservices = false;
-        dts = NULL;
-        numdts = 0;
     }
 
+    //Never called - could possibly remove serialization support
     CSashaCommand(MemoryBuffer &mb)
     {
         deserialize(mb);

+ 2 - 2
dockerfiles/README.md

@@ -39,8 +39,8 @@ Bash scripts
 buildall.sh - Used to create and publish a docker container corresponding to a github tag
 clean.sh    - Clean up old docker images (if disk gets full)
 incr.sh     - Build local images for testing (delta from a published image)
-startall.sh - Start a local k8s cluster
-stopall.sh  - Stop a local k8s cluster
+startall.sh - Start a local k8s cluster, and optional Elastic Stack for log processing purposes
+stopall.sh  - Stop a local k8s cluster, and optional Elastic Stack
 
 ---
 

+ 44 - 16
dockerfiles/startall.sh

@@ -26,7 +26,33 @@ restArgs=()
 CLUSTERNAME=mycluster
 PVFILE=$scriptdir/../helm/examples/local/hpcc-localfile/values.yaml
 
+dependency_check () {
+
+  if [ -z "$1" ]
+  then
+      CHART_SUBPATH="hpcc"
+  else
+      CHART_SUBPATH=$1
+  fi
+
+  missingDeps=0
+  while IFS= read -r line
+  do
+    echo "${line}"
+    if echo "${line}" | egrep -q 'missing$'; then
+      let "missingDeps++"
+    fi
+  done < <(helm dependency list ${scriptdir}/../helm/${CHART_SUBPATH} | grep -v WARNING)
+  if [[ ${missingDeps} -gt 0 ]]; then
+    echo "Some of the chart dependencies are missing."
+    echo "Either issue a 'helm dependency update ${scriptdir}/../helm/${CHART_SUBPATH}' to fetch them,"
+    echo "or rerun $0 with option -c to auto update them."
+    exit 0
+  fi
+}
+
 CMD="install"
+DEVELOPER_OPTIONS="--set global.privileged=true"
 while [ "$#" -gt 0 ]; do
   arg=$1
   if [[ ${arg:0:1} == '-' ]]; then
@@ -61,11 +87,17 @@ while [ "$#" -gt 0 ]; do
          echo "    -c                 Update chart dependencies"
          echo "    -p <location>      Use local persistent data"
          echo "    -pv <yamlfile>     Override dataplane definitions for local persistent data"
+         echo "    -e                 Deploy light-weight Elastic Stack for component log processing"
          exit
          ;;
       t) CMD="template"
          restArgs+="--debug"
          ;;
+      # vanilla install - for testing system in the same way it will normally be used
+      v) DEVELOPER_OPTIONS=""
+         ;;
+      e) DEPLOY_ES=true
+         ;;
       *) restArgs+=(${arg})
          ;;
     esac
@@ -75,26 +107,14 @@ while [ "$#" -gt 0 ]; do
   shift
 done
 
+
 if [[ -n "${DEP_UPDATE_ARG}" ]]; then
   if [[ "${CMD}" = "upgrade" ]]; then
     echo "Chart dependencies cannot be updated whilst performing a helm upgrade"
     DEP_UPDATE_ARG=""
   fi
 else
-  missingDeps=0
-  while IFS= read -r line
-  do
-    echo "${line}"
-    if echo "${line}" | egrep -q 'missing$'; then
-      let "missingDeps++"
-    fi
-  done < <(helm dependency list ${scriptdir}/../helm/hpcc | grep -v WARNING)
-  if [[ ${missingDeps} -gt 0 ]]; then
-    echo "Some of the chart dependencies are missing."
-    echo "Either issue a 'helm dependency update ${scriptdir}/../helm/hpcc' to fetch them,"
-    echo "or rerun $0 with option -c to auto update them."
-    exit 0
-  fi
+  dependency_check "hpcc"
 fi
 
 [[ -n ${INPUT_DOCKER_REPO} ]] && DOCKER_REPO=${INPUT_DOCKER_REPO}
@@ -109,9 +129,17 @@ if [[ -n ${PERSIST} ]] ; then
   done
   helm ${CMD} localfile $scriptdir/../helm/examples/local/hpcc-localfile --set common.hostpath=${PERSIST} $PERSISTVALUES | tee lsfull.yaml | grep -A1000 storage: > localstorage.yaml && \
   grep "##" lsfull.yaml  && \
-  helm ${CMD} $CLUSTERNAME $scriptdir/../helm/hpcc/ --set global.image.root="${DOCKER_REPO}" --set global.image.version=$LABEL --set global.privileged=true $DEP_UPDATE_ARG ${restArgs[@]} -f localstorage.yaml
+  helm ${CMD} $CLUSTERNAME $scriptdir/../helm/hpcc/ --set global.image.root="${DOCKER_REPO}" --set global.image.version=$LABEL $DEVELOPER_OPTIONS $DEP_UPDATE_ARG ${restArgs[@]} -f localstorage.yaml
 else
-  helm ${CMD} $CLUSTERNAME $scriptdir/../helm/hpcc/ --set global.image.root="${DOCKER_REPO}" --set global.image.version=$LABEL --set global.privileged=true $DEP_UPDATE_ARG ${restArgs[@]}
+  helm ${CMD} $CLUSTERNAME $scriptdir/../helm/hpcc/ --set global.image.root="${DOCKER_REPO}" --set global.image.version=$LABEL $DEVELOPER_OPTIONS $DEP_UPDATE_ARG ${restArgs[@]}
+fi
+
+if [[ $DEPLOY_ES ]] ; then
+  echo -e "\n\nDeploying "myelastic4hpcclogs" - light-weight Elastic Stack:"
+  if [[ -z "${DEP_UPDATE_ARG}" ]]; then
+    dependency_check "managed/logging/elastic"
+  fi
+  helm ${CMD} myelastic4hpcclogs $scriptdir/../helm/managed/logging/elastic $DEP_UPDATE_ARG ${restArgs[@]}
 fi
 
 if [ ${CMD} != "template" ] ; then

+ 11 - 2
dockerfiles/stopall.sh

@@ -29,8 +29,11 @@ while [ "$#" -gt 0 ]; do
       -n) shift
          CLUSTERNAME=$1
          ;;
+      -e) UNINSTALL_ELK=1
+         ;;
       *) echo "Usage: stoptall.sh [options]"
          echo "    -w  Wait for all pods to terminate"
+         echo "    -e  Uninstall light-weight Elastic Stack"
          exit
          ;;
     esac
@@ -39,8 +42,14 @@ done
 
 helm uninstall $CLUSTERNAME
 helm uninstall localfile
-kubectl delete jobs --all 
-kubectl delete networkpolicy --all 
+kubectl delete jobs --all
+kubectl delete networkpolicy --all
+if [[ $UNINSTALL_ELK == 1 ]] ; then
+  echo "Uninstalling myelastic4hpcclogs:"
+  echo "PLEASE NOTE: Elastic Search declares PVC(s) which might require explicit manual removal if no longer needed."
+  helm uninstall myelastic4hpcclogs
+  kubectl get pvc
+fi
 if [[ $wait == 1 ]] ; then
   sleep 2
   while (kubectl get pods | grep -q ^NAME) ; do

+ 27 - 7
ecl/eclcc/eclcc.cpp

@@ -308,6 +308,7 @@ protected:
     void processDefinitions(EclRepositoryArray & repositories);
     void reportCompileErrors(IErrorReceiver & errorProcessor, const char * processName);
     void setDebugOption(const char * name, bool value);
+    void traceError(char const * format, ...) __attribute__((format(printf, 2, 3)));
     void usage();
 
 protected:
@@ -926,7 +927,7 @@ void EclCC::instantECL(EclCompileInstance & instance, IWorkUnit *wu, const char
                     fprintf(stdout, "Output file '%s' created\n",outputFile);
                     break;
                 case WUStateFailed:
-                    UERRLOG("Failed to create output file '%s'\n",outputFile);
+                    traceError("Failed to create output file '%s'\n",outputFile);
                     break;
                 case WUStateUploadingFiles:
                     fprintf(stdout, "Output file '%s' created, local file upload required\n",outputFile);
@@ -935,7 +936,7 @@ void EclCC::instantECL(EclCompileInstance & instance, IWorkUnit *wu, const char
                     fprintf(stdout, "No DLL/SO required\n");
                     break;
                 default:
-                    UERRLOG("Unexpected Workunit state %d\n", (int) wu->getState());
+                    traceError("Unexpected Workunit state %d\n", (int) wu->getState());
                     break;
                 }
             }
@@ -2014,7 +2015,7 @@ bool EclCC::generatePrecompiledHeader()
 {
     if (inputFiles.ordinality() != 0)
     {
-        UERRLOG("No input files should be specified when generating precompiled header");
+        traceError("No input files should be specified when generating precompiled header");
         return false;
     }
     StringArray paths;
@@ -2033,7 +2034,7 @@ bool EclCC::generatePrecompiledHeader()
     }
     if (!foundPath)
     {
-        UERRLOG("Cannot find eclinclude4.hpp");
+        traceError("Cannot find eclinclude4.hpp");
         return false;
     }
     Owned<ICppCompiler> compiler = createCompiler("precompile", foundPath, nullptr, nullptr);
@@ -2055,7 +2056,7 @@ bool EclCC::generatePrecompiledHeader()
     }
     else
     {
-        UERRLOG("Compilation failed - see %s for details", cclogFilename.str());
+        traceError("Compilation failed - see %s for details", cclogFilename.str());
         return false;
     }
 }
@@ -2178,6 +2179,25 @@ void EclCC::setDebugOption(const char * name, bool value)
 }
 
 
+void EclCC::traceError(char const * format, ...)
+{
+    va_list args;
+    va_start(args, format);
+
+    if (optXml)
+    {
+        StringBuffer msg;
+        msg.valist_appendf(format, args);
+        StringBuffer encoded;
+        encodeXML(msg.str(), encoded);
+        UERRLOG("<exception msg='%s'/>", encoded.str());
+    }
+    else
+        VALOG(MCuserError, unknownJob, format, args);
+
+    va_end(args);
+}
+
 void EclCompileInstance::checkEclVersionCompatible()
 {
     //Strange function that might modify errorProcessor...
@@ -2814,7 +2834,7 @@ int EclCC::parseCommandLineOptions(int argc, const char* argv[])
         {
             if (!checkFileExists(optIniFilename))
             {
-                UERRLOG("Error: INI file '%s' does not exist",optIniFilename.get());
+                traceError("Error: INI file '%s' does not exist",optIniFilename.get());
                 return 1;
             }
         }
@@ -2908,7 +2928,7 @@ int EclCC::parseCommandLineOptions(int argc, const char* argv[])
             //If --config has been specified, then ignore any unknown options beginning with -- since they will be added to the globals.
             if ((arg[1] == '-') && optConfig)
                 continue;
-            UERRLOG("Error: unrecognised option %s",arg);
+            traceError("Error: unrecognised option %s",arg);
             usage();
             return 1;
         }

+ 3 - 3
ecl/eclccserver/eclccserver.cpp

@@ -463,7 +463,7 @@ class EclccCompileThread : implements IPooledThread, implements IErrorReporter,
         StringBuffer eclccProgName;
         splitDirTail(queryCurrentProcessPath(), eclccProgName);
         eclccProgName.append("eclcc");
-        StringBuffer eclccCmd(" -shared");
+        StringBuffer eclccCmd(" --xml -shared");
         //Clone all the options that were passed to eclccserver (but not the filename) and also pass them to eclcc
         for (const char * * pArg = globalArgv+1; *pArg; pArg++)
             eclccCmd.append(' ').append(*pArg);
@@ -472,7 +472,7 @@ class EclccCompileThread : implements IPooledThread, implements IErrorReporter,
             eclccCmd.append(" -");
         if (mainDefinition.length())
             eclccCmd.append(" -main \"").append(mainDefinition).append("\"");
-        eclccCmd.append(" --timings --xml");
+        eclccCmd.append(" --timings");
         eclccCmd.append(" --nostdinc");
         eclccCmd.append(" --metacache=");
 
@@ -506,7 +506,7 @@ class EclccCompileThread : implements IPooledThread, implements IErrorReporter,
         }
         Owned<IPipeProcess> pipe = createPipeProcess();
         pipe->setenv("ECLCCSERVER_THREAD_INDEX", idxStr.str());
-        Owned<IPropertyTreeIterator> options = config->getElements("./Option");
+        Owned<IPropertyTreeIterator> options = config->getElements(isContainerized() ? "./options" : "./Option");
         ForEach(*options)
         {
             IPropertyTree &option = options->query();

+ 15 - 0
ecllibrary/std/File.ecl

@@ -101,6 +101,8 @@ EXPORT FsDropZone := lib_fileservices.FsDropZone;
 
 EXPORT FsDropZoneRecord := lib_fileservices.FsDropZoneRecord;
 
+EXPORT FsLandingZoneRecord := lib_fileservices.FsLandingZoneRecord;
+
 /*------------------------------------- Spray functions -----------------------------------------------------------*/
 
 /**
@@ -1049,6 +1051,19 @@ EXPORT dataset(FsDropZoneRecord) GetDropZones() :=
     lib_fileservices.FileServices.GetDropZones();
 
 /**
+ * Returns a dataset with (name, path and hostname) for all landing zones (or Drop Zones)
+ * A enhanced version of GetDropZones()
+ *
+ *
+ * @return              A dataset containing all defined Drop Zone paths.
+ *                      Will return an empty dataset if a Drop Zone
+ *                      cannot be found
+ */
+
+EXPORT dataset(FsLandingZoneRecord) GetLandingZones() :=
+    lib_fileservices.FileServices.GetLandingZones();
+
+/**
  * Return the expire days property of the specified logical filename.
  *
  * @param lfn           The name of the logical file.

BIN
elastic4hpcclogs-0.1.0.tgz


+ 1 - 1
esp/bindings/http/platform/httpprot.cpp

@@ -317,7 +317,7 @@ bool CSecureHttpProtocol::notifySelected(ISocket *sock,unsigned selected, IPersi
             {
                 char peername[256];
                 int port = accepted->peer_name(peername, 256);
-                DBGLOG("HTTPS connection from %s:%d on %s socket", peername, port, persistentHandler?"persistent":"new");
+                ESPLOG(LogMax, "HTTPS connection from %s:%d on %s socket", peername, port, persistentHandler?"persistent":"new");
                 if(m_ssctx != NULL)
                 {
                     if(m_maxConcurrentThreads > 0)

+ 1 - 1
esp/bindings/http/platform/httptransport.cpp

@@ -1996,7 +1996,7 @@ int CHttpRequest::processHeaders(IMultiException *me)
     if(m_content_length > 0 && m_MaxRequestEntityLength > 0 && m_content_length > m_MaxRequestEntityLength && (!isUpload(false)))
     {
         UERRLOG("Bad request: Content-Length exceeded maxRequestEntityLength");
-        throw createEspHttpException(HTTP_STATUS_BAD_REQUEST_CODE, "The request length was too long.", HTTP_STATUS_BAD_REQUEST);
+        throw createEspHttpException(HTTP_STATUS_REQUEST_ENTITY_TOO_LARGE_CODE, "The request length was too long.", HTTP_STATUS_REQUEST_ENTITY_TOO_LARGE);
     }
     setPersistentEligible(checkPersistentEligible());
 

+ 1 - 0
esp/bindings/http/platform/httptransport.hpp

@@ -39,6 +39,7 @@
 #define HTTP_STATUS_FORBIDDEN               "403 Forbidden"
 #define HTTP_STATUS_NOT_FOUND               "404 Not Found"
 #define HTTP_STATUS_NOT_ALLOWED             "405 Method Not Allowed"
+#define HTTP_STATUS_REQUEST_ENTITY_TOO_LARGE   "413 Request Entity Too Large"
 #define HTTP_STATUS_INTERNAL_SERVER_ERROR   "500 Internal Server Error"
 #define HTTP_STATUS_NOT_IMPLEMENTED         "501 Not Implemented"
 

+ 1 - 1
esp/scm/ws_workunits.ecm

@@ -25,7 +25,7 @@ EspInclude(ws_workunits_queryset_req_resp);
 
 ESPservice [
     auth_feature("DEFERRED"), //This declares that the method logic handles feature level authorization
-    version("1.82"), default_client_version("1.82"), cache_group("ESPWsWUs"),
+    version("1.83"), default_client_version("1.83"), cache_group("ESPWsWUs"),
     noforms,exceptions_inline("./smc_xslt/exceptions.xslt"),use_method_name] WsWorkunits
 {
     ESPmethod [cache_seconds(60), resp_xsl_default("/esp/xslt/workunits.xslt")]     WUQuery(WUQueryRequest, WUQueryResponse);

+ 1 - 0
esp/scm/ws_workunits_queryset_req_resp.ecm

@@ -293,6 +293,7 @@ ESPresponse [exceptions_inline] WUQueryDetailsResponse
     [min_ver("1.44")] ESParray<ESPstruct QuerySuperFile, SuperFile> SuperFiles;
     [min_ver("1.46")] bool IsLibrary;
     [min_ver("1.46")] string Priority;
+    [min_ver("1.83")] int PriorityID;
     [min_ver("1.46")] string WUSnapShot; //Label
     [min_ver("1.46")] string CompileTime;
     [min_ver("1.46")] ESParray<string> LibrariesUsed;

+ 1 - 0
esp/scm/ws_workunits_struct.ecm

@@ -589,6 +589,7 @@ ESPStruct [nil_remove] QuerySetQuery
     [min_ver("1.46")] bool Activated;
     [min_ver("1.46")] string PublishedBy;
     [min_ver("1.48")] string snapshot;
+    [min_ver("1.83")] int PriorityID;
 };
 
 ESPStruct QuerySetAlias

+ 17 - 7
esp/services/ws_workunits/ws_workunitsQuerySets.cpp

@@ -1028,8 +1028,21 @@ void addClusterQueryStates(IPropertyTree* queriesOnCluster, const char *target,
     clusterStates.append(*clusterState.getClear());
 }
 
+template<typename T>
+void checkAndSetQueryPriority(double version, IPropertyTree *query, T *ret)
+{
+    if (!query->hasProp("@priority"))
+        return;
+
+    int priorityID = query->getPropInt("@priority");
+    ret->setPriority(getQueryPriorityName(priorityID));
+    if (version >= 1.83)
+        ret->setPriorityID(priorityID);
+}
+
 void gatherQuerySetQueryDetails(IEspContext &context, IPropertyTree *query, IEspQuerySetQuery *queryInfo, const char *cluster, IPropertyTree *queriesOnCluster)
 {
+    double version = context.getClientVersion();
     queryInfo->setId(query->queryProp("@id"));
     queryInfo->setName(query->queryProp("@name"));
     queryInfo->setDll(query->queryProp("@dll"));
@@ -1045,13 +1058,11 @@ void gatherQuerySetQueryDetails(IEspContext &context, IPropertyTree *query, IEsp
         queryInfo->setTimeLimit(query->getPropInt("@timeLimit"));
     if (query->hasProp("@warnTimeLimit"))
         queryInfo->setWarnTimeLimit(query->getPropInt("@warnTimeLimit"));
-    if (query->hasProp("@priority"))
-        queryInfo->setPriority(getQueryPriorityName(query->getPropInt("@priority")));
+    checkAndSetQueryPriority(version, query, queryInfo);
     if (query->hasProp("@comment"))
         queryInfo->setComment(query->queryProp("@comment"));
     if (query->hasProp("@snapshot"))
         queryInfo->setSnapshot(query->queryProp("@snapshot"));
-    double version = context.getClientVersion();
     if (version >= 1.46)
     {
         queryInfo->setPublishedBy(query->queryProp("@publishedBy"));
@@ -1662,6 +1673,7 @@ bool CWsWorkunitsEx::onWUListQueries(IEspContext &context, IEspWUListQueriesRequ
         q->setWuid(query.queryProp("@wuid"));
         q->setActivated(query.getPropBool("@activated", false));
         q->setSuspended(query.getPropBool("@suspended", false));
+
         if (query.hasProp("@memoryLimit"))
         {
             StringBuffer s;
@@ -1672,8 +1684,7 @@ bool CWsWorkunitsEx::onWUListQueries(IEspContext &context, IEspWUListQueriesRequ
             q->setTimeLimit(query.getPropInt("@timeLimit"));
         if (query.hasProp("@warnTimeLimit"))
             q->setWarnTimeLimit(query.getPropInt("@warnTimeLimit"));
-        if (query.hasProp("@priority"))
-            q->setPriority(getQueryPriorityName(query.getPropInt("@priority")));
+        checkAndSetQueryPriority(version, &query, q.get());
         if (query.hasProp("@comment"))
             q->setComment(query.queryProp("@comment"));
         if (version >= 1.46)
@@ -2101,8 +2112,7 @@ void CWsWorkunitsEx::getWUQueryDetails(IEspContext &context, CWUQueryDetailsReq
     double version = context.getClientVersion();
     if (version >= 1.46)
     {
-        if (query->hasProp("@priority"))
-            resp.setPriority(getQueryPriorityName(query->getPropInt("@priority")));
+        checkAndSetQueryPriority(version, query, &resp);
         resp.setIsLibrary(query->getPropBool("@isLibrary"));
 
         if (version < 1.64)

+ 1 - 1
esp/src/eclwatch/LZBrowseWidget.js

@@ -141,7 +141,7 @@ define([
 
             this.dropZoneTarget2Select.on("change", function (evt) {
                 if (evt) {
-                    context.serverFilterSelect.loadDropZoneMachines(evt);
+                    context.serverFilterSelect.loadDropZoneMachines(evt, true);
                 }
             });
         },

+ 10 - 1
esp/src/eclwatch/QuerySetQueryWidget.js

@@ -397,6 +397,13 @@ define([
                             return "<a href='#' onClick='return false;' class='dgrid-row-url'>" + Id + "</a>";
                         }
                     },
+                    priority: {
+                        label: this.i18n.Priority,
+                        width: 80,
+                        formatter: function (priority, idx) {
+                            return priority === undefined ? "" : priority;
+                        }
+                    },
                     Name: {
                         label: this.i18n.Name
                     },
@@ -650,7 +657,9 @@ define([
             }
             var optionsForm = registry.byId(this.id + "OptionsForm");
             var optionsValues = optionsForm.getValues();
-            return lang.mixin(this.filter.toObject(), optionsValues);
+            const retVal = lang.mixin(this.filter.toObject(), optionsValues);
+            retVal.PriorityHigh = retVal.PriorityLow;
+            return retVal;
         },
 
         ensurePane: function (id, params, workunitTab) {

+ 2 - 2
esp/src/eclwatch/TargetSelectClass.js

@@ -328,7 +328,7 @@ define([
             });
         },
 
-        loadDropZoneMachines: function (Name) {
+        loadDropZoneMachines: function (Name, useConfig) {
             var context = this;
             this.set("disabled", true);
             if (Name) {
@@ -349,7 +349,7 @@ define([
                             for (var i = 0; i < targetData.length; ++i) {
                                 context.options.push({
                                     label: targetData[i].Netaddress,
-                                    value: targetData[i].Netaddress
+                                    value: useConfig ? targetData[i].ConfigNetaddress : targetData[i].Netaddress
                                 });
                             }
                             context._postLoad();

+ 5 - 6
esp/src/eclwatch/TimingPageWidget.js

@@ -1,6 +1,7 @@
 define([
     "dojo/_base/declare",
     "src/nlsHPCC",
+    "src/Memory",
     "dojo/store/Observable",
 
     "dijit/registry",
@@ -28,7 +29,7 @@ define([
     "dijit/form/DropDownButton",
     "dijit/TooltipDialog"
 
-], function (declare, nlsHPCCMod, Observable,
+], function (declare, nlsHPCCMod, MemoryMod, Observable,
     registry,
     _TabContainerWidget, ESPWorkunit, DelayLoadWidget, ESPUtil, srcTimings,
     hpccComms,
@@ -118,13 +119,11 @@ define([
                 context.refreshGrid();
             };
 
-            var store = new ESPUtil.UndefinedMemory({
-                idProperty: "__hpcc_id",
-                data: []
-            });
+            var store = new MemoryMod.AlphaNumSortMemory("__hpcc_id", { Scope: true });
             this.store = new Observable(store);
             this.grid = new declare([ESPUtil.Grid(false, true)])({
-                store: this.store
+                store: this.store,
+                sort: "__hpcc_id"
             }, this.id + "Grid");
             this.grid.on(".dgrid-row-url:click", function (evt) {
                 var row = context.grid.row(evt).data;

+ 6 - 0
esp/src/eclwatch/templates/QuerySetQueryWidget.html

@@ -34,6 +34,12 @@
                         <input id="${id}QueryName" title="${i18n.Name}:" name="QueryName" colspan="2"
                             data-dojo-props="trim: true, placeHolder: '${i18n.QueryNamePlaceholder}'"
                             data-dojo-type="dijit.form.TextBox" />
+                        <select id="${id}Priority" title="${i18n.Priority}:" name="PriorityLow" colspan="2" data-dojo-type="dijit.form.Select">
+                           <option value="" selected="selected">${i18n.None}</option>
+                           <option value="0">${i18n.Low}</option>
+                           <option value="1">${i18n.High}</option>
+                           <option value="2">${i18n.SLA}</option>
+                        </select>
                         <input id="${id}PublishedBy" title="${i18n.PublishedBy}:" name="PublishedBy" colspan="2"
                             data-dojo-props="trim: true, placeHolder: '${i18n.PublishedBy}'"
                             data-dojo-type="dijit.form.TextBox" />

+ 1 - 2
esp/test/httptest/httptest.cpp

@@ -1297,8 +1297,7 @@ void SplitURL(const char* url, StringBuffer& protocol,StringBuffer& UserName,Str
         UserName.append(username);
     }
 
-    if(hostptr)
-        host.append(hostptr);
+    host.append(hostptr);
 
     if(portptr)
         port.append(portptr);

+ 4 - 4
esp/test/httptest/httptest.hpp

@@ -64,11 +64,11 @@ private:
     int          m_threads;
     int          m_times;
     StringBuffer m_host;
-    int          m_port;
-    FILE*        m_ofile;
-    bool         m_use_ssl;
+    int          m_port = 0;
+    FILE*        m_ofile = nullptr;
+    bool         m_use_ssl = false;
     Owned<ISecureSocketContext> m_ssctx;
-    int          m_delay;
+    int          m_delay = 0;
     HttpStat     m_stat;
 
 public:

+ 3 - 23
helm/examples/azure/hpcc-azurefile/templates/NOTES.txt

@@ -7,35 +7,15 @@ storage:
   - name: {{ $plane.name }}
     pvc: {{ printf "%s-%s-pvc" $plane.name (include "hpcc-azurefile.fullname" $) }}
     prefix: {{ printf "%s%s%s" $.Values.common.mountPrefix (regexMatch ".*/$" $.Values.common.mountPrefix | ternary "" "/") $plane.subPath | quote }}
-{{- if hasKey $plane "labels" }}
-    labels:
-{{ toYaml $plane.labels | indent 4 }}
-{{ end -}}
+    category: {{ $plane.category }}
 {{- end }}
 
 {{ range $plane := .Values.planes -}}
-{{- if eq "dali" $plane.name }}
-  daliStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{- if eq "dll" $plane.name }}
-  dllStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{- if eq "data" $plane.name }}
-  dataStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{ end -}}
-
-{{ range $plane := .Values.planes -}}
 {{ if eq "sasha" $plane.name }}
 sasha:
   wu-archiver:
-    storage:
-      plane: {{ $plane.name }}
+    plane: {{ $plane.name }}
   dfuwu-archiver:
-    storage:
-      plane: {{ $plane.name }}
+    plane: {{ $plane.name }}
 {{ end -}}
 {{- end }}

+ 3 - 23
helm/examples/efs/hpcc-efs/templates/NOTES.txt

@@ -7,35 +7,15 @@ storage:
   - name: {{ $plane.name }}
     pvc: {{ printf "%s-%s-pvc" $plane.name (include "hpcc-efs.fullname" $) }}
     prefix: {{ printf "%s%s%s" $.Values.common.mountPrefix (regexMatch ".*/$" $.Values.common.mountPrefix | ternary "" "/") $plane.subPath | quote }}
-{{- if hasKey $plane "labels" }}
-    labels:
-{{ toYaml $plane.labels | indent 4 }}
-{{ end -}}
+    category: {{ $plane.category }}
 {{- end }}
 
 {{ range $plane := .Values.planes -}}
-{{- if eq "dali" $plane.name }}
-  daliStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{- if eq "dll" $plane.name }}
-  dllStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{- if eq "data" $plane.name }}
-  dataStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{ end -}}
-
-{{ range $plane := .Values.planes -}}
 {{ if eq "sasha" $plane.name }}
 sasha:
   wu-archiver:
-    storage:
-      plane: {{ $plane.name }}
+    plane: {{ $plane.name }}
   dfuwu-archiver:
-    storage:
-      plane: {{ $plane.name }}
+    plane: {{ $plane.name }}
 {{ end -}}
 {{- end }}

+ 3 - 23
helm/examples/filestore/hpcc-filestore/templates/NOTES.txt

@@ -7,35 +7,15 @@ storage:
   - name: {{ $plane.name }}
     pvc: {{ printf "%s-%s-pvc" $plane.name (include "hpcc-filestore.fullname" $) }}
     prefix: {{ printf "%s%s%s" $.Values.common.mountPrefix (regexMatch ".*/$" $.Values.common.mountPrefix | ternary "" "/") $plane.subPath | quote }}
-{{- if hasKey $plane "labels" }}
-    labels:
-{{ toYaml $plane.labels | indent 4 }}
-{{ end -}}
+    category: {{ $plane.category }}
 {{- end }}
 
 {{ range $plane := .Values.planes -}}
-{{- if eq "dali" $plane.name }}
-  daliStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{- if eq "dll" $plane.name }}
-  dllStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{- if eq "data" $plane.name }}
-  dataStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{ end -}}
-
-{{ range $plane := .Values.planes -}}
 {{ if eq "sasha" $plane.name }}
 sasha:
   wu-archiver:
-    storage:
-      plane: {{ $plane.name }}
+    plane: {{ $plane.name }}
   dfuwu-archiver:
-    storage:
-      plane: {{ $plane.name }}
+    plane: {{ $plane.name }}
 {{ end -}}
 {{- end }}

+ 2 - 19
helm/examples/local/hpcc-localfile/templates/NOTES.txt

@@ -15,28 +15,11 @@ storage:
 {{- end }}
 
 {{ range $plane := .Values.planes -}}
-{{- if eq "dali" $plane.name }}
-  daliStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{- if eq "dll" $plane.name }}
-  dllStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{- if eq "data" $plane.name }}
-  dataStorage:
-    plane: {{ $plane.name }}
-{{ end -}}
-{{ end -}}
-
-{{ range $plane := .Values.planes -}}
 {{ if eq "sasha" $plane.name }}
 sasha:
   wu-archiver:
-    storage:
-      plane: {{ $plane.name }}
+    plane: {{ $plane.name }}
   dfuwu-archiver:
-    storage:
-      plane: {{ $plane.name }}
+    plane: {{ $plane.name }}
 {{ end -}}
 {{- end }}

+ 22 - 0
helm/examples/metrics/README.md

@@ -0,0 +1,22 @@
+# HPCC Metrics
+HPCC components collect metrics describing overall component health and state. In order to use component metrics for
+activities such as scaling and alerting, the collected metrics must be reported to a collection system such as
+Prometheus or Elasticsearch.
+
+## Enabling Metric Reporting
+To enable reporting of metrics to a collection system, add metric configuration settings to the helm chart. The
+configuration includes the collection system in use and the settings necessary to report metrics.
+
+The provided HPCC helm chart provides all global settings from its values.yml file to all components. To enable
+metrics reporting, either include the metrics configuration as a permanent part of your HPCC helm chart values.yml
+file, or add it as command line settings at chart installation. Since the configuration consists of multiple settings,
+if enabling metrics via the command line, use the _-f filename_ option for convenience.
+
+This directory contains examples of configuration settings for HPCC provided collection systems. See each file for
+a description of its available settings. Use the file as is, or customize for your installation.
+
+For example, the following adds the Prometheus configuration as the collection system.
+
+```code
+helm install mycluster ./hpcc -f <path>/prometheus_metrics.yml
+```

+ 19 - 0
helm/examples/metrics/filesink.yaml

@@ -0,0 +1,19 @@
+#
+# Defines a file sink for accepting metric reports.
+# Settings:
+#   type              - type of sink (must be file for the file sink)
+#   name              - name for the sink instance
+#   settings.filename - name of the file where metrics are to be written
+#   settings.clear    - true to clear the file when metric reporting begins
+#                       false to append to an existing file
+#   settings.period   - period in seconds between collections
+
+global:
+  metrics:
+    sinks:
+    - type: file
+      name: filesink
+      settings:
+        filename: testout.txt
+        clear: true
+        period: 5

+ 13 - 0
helm/examples/metrics/loggingsink.yaml

@@ -0,0 +1,13 @@
+#
+# Defines a logging sink that writes metric reports to the component defined log
+# Settings:
+#   type             - sink type (must be log for the logging sink)
+#   name             - name for the sink instance
+#   settings.period  - period in seconds between collections
+global:
+  metrics:
+    sinks:
+    - type: log
+      name: loggingsink
+      settings:
+        period: 60

+ 17 - 0
helm/examples/metrics/prometheus_metrics.yaml

@@ -0,0 +1,17 @@
+#
+# Defines a prometheus sink that responds to scraping requests
+# Settings:
+#   type                  - sink type (must be prometheus for prometheus support)
+#   name                  - name for the sink instance
+#   settings.port         - port number on which to listen for scrape requests
+#   settings.serviceName  - local service name for the prometheus HTTP service
+#   settings.verbose      - enables verbose reports
+global:
+  metrics:
+    sinks:
+    - type: prometheus
+      name: prometheussink
+      settings:
+        port: 8767
+        serviceName: metrics
+        verbose: true

+ 4 - 28
helm/examples/nfs/hpcc-nfs/templates/NOTES.txt

@@ -7,40 +7,16 @@ storage:
   - name: {{ $plane.name }}
     pvc: {{ printf "%s-%s-pvc" $plane.name (include "hpcc-nfs.fullname" $) }}
     prefix: {{ printf "%s%s%s" $.Values.common.mountPrefix (regexMatch ".*/$" $.Values.common.mountPrefix | ternary "" "/") $plane.subPath | quote }}
-{{- if hasKey $plane "labels" }}
-    labels:
-{{ toYaml $plane.labels | indent 4 }}
-{{ end -}}
-{{- end }}
-
-{{ range $plane := .Values.planes -}}
-{{- if eq "dali" $plane.name }}
-  daliStorage:
-    plane: {{ $plane.name }}
-    forcePermissions: true
-{{ end -}}
-{{- if eq "dll" $plane.name }}
-  dllStorage:
-    plane: {{ $plane.name }}
+    category: {{ $plane.category }}
     forcePermissions: true
-{{ end -}}
-{{- if eq "data" $plane.name }}
-  dataStorage:
-    plane: {{ $plane.name }}
-    forcePermissions: true
-{{ end -}}
-{{ end -}}
+{{- end }}
 
 {{ range $plane := .Values.planes -}}
 {{ if eq "sasha" $plane.name }}
 sasha:
   wu-archiver:
-    storage:
-      plane: {{ $plane.name }}
-      forcePermissions: true
+    plane: {{ $plane.name }}
   dfuwu-archiver:
-    storage:
-      plane: {{ $plane.name }}
-      forcePermissions: true
+    plane: {{ $plane.name }}
 {{ end -}}
 {{- end }}

+ 1 - 1
helm/hpcc/docs/changes.md

@@ -206,4 +206,4 @@ This is now simplified to:
     plane: sasha
 ```
 
-A value of "" can be used mean use the 1st plane with the sasha catagory.
+A value of "" can be used mean use the 1st plane with the sasha category.

File diff suppressed because it is too large
+ 191 - 0
helm/hpcc/docs/storage.md


+ 4 - 0
helm/hpcc/templates/NOTES.txt

@@ -1,6 +1,10 @@
 {{- $defaultImage :=  include "hpcc.imageName" (dict "root" $ "me" dict ) | quote }}
+{{- $defaultVersion :=  .Values.global.image.version | default .Chart.Version }}
 Thank you for installing the HPCC chart version {{ .Chart.Version }} using image {{ $defaultImage }}
 
+{{ if (ne $defaultVersion .Chart.Version) }}
+ {{- printf "**** WARNING: The expected image version for this helm chart is %s ****" .Chart.Version }}
+{{- end }}
 {{/* Gather a list of ephemeral and persistant planes */}}
 {{- $storage := (.Values.storage | default dict) -}}
 {{- $match := dict "ephemeral" (list) "persistant" (list) -}}

+ 15 - 1
helm/hpcc/templates/_helpers.tpl

@@ -129,6 +129,17 @@ Get default dll plane
 {{- end -}}
 
 {{/*
+Returns the largest number of workers from all the thors
+*/}}
+{{- define "hpcc.getMaxNumWorkers" -}}
+ {{- $maxNumWorkers := 1 -}}
+ {{- range $thor := .Values.thor -}}
+  {{- $maxNumWorkers = max $maxNumWorkers $thor.numWorkers -}}
+ {{- end -}}
+ {{- $maxNumWorkers -}}
+{{- end -}}
+
+{{/*
 Generate global ConfigMap info
 Pass in root as .
 */}}
@@ -162,7 +173,10 @@ storage:
   {{- if $plane.subPath -}}
    {{- $_ := set $planeYaml "prefix" (printf "%s/%s" $planeYaml.prefix $plane.subPath) -}}
   {{- end -}}
-    {{- toYaml $planeYaml | nindent 4 }}
+  {{- if and (eq "data" $plane.category) (not $plane.defaultSprayParts) -}}
+   {{- $_ := set $planeYaml "defaultSprayParts" (include "hpcc.getMaxNumWorkers" $ | int) -}}
+  {{- end -}}
+  {{- toYaml $planeYaml | nindent 4 }}
  {{- end }}
 {{- end }}
 {{- if not (include "hpcc.hasPlaneForCategory" (dict "root" $ "category" "spill")) }}

+ 23 - 0
helm/hpcc/values.schema.json

@@ -572,6 +572,25 @@
       },
       "additionalProperties": { "type": ["integer", "string", "boolean"] }
     },
+    "compileOption": {
+      "type": "object",
+      "properties": {
+        "name": {
+          "type": "string",
+          "description": "Compiler option name"
+        },
+        "value": {
+          "type": ["number", "string", "boolean"],
+          "description": "Compiler option  value"
+        },
+        "cluster": {
+          "type": "string",
+          "description": "Cluster to apply option to"
+        }
+      },
+      "required": [ "name", "value" ],
+      "additionalProperties": false
+    },
     "env": {
       "type": "array",
       "items": {
@@ -689,6 +708,10 @@
           "type": "object",
           "additionalProperties": { "type": "string" }
         },
+        "options": {
+          "type": "array",
+          "items": { "$ref": "#/definitions/compileOption" }
+        },
         "resources": {
           "$ref": "#/definitions/resources"
         }

+ 19 - 21
helm/hpcc/values.yaml

@@ -53,15 +53,6 @@ global:
   # Set postJobCommandViaSidecar to true, if the command needs to run with privilege, this will enable the command
   # to run as root in a sidecar in same process space as other containers, allowing it to for example send signals
   # to processes in sidecars
-
-  # Define global default metrics configuration for all components
-  metrics:
-    sinks:
-      - type: log
-        name: logging
-        settings:
-          period: 60
-
   # misc:
   #   postJobCommand: "curl -sf -XPOST http://127.0.0.1:15020/quitquitquit"
   # Or example for linkerd
@@ -121,24 +112,25 @@ storage:
   planes:
   #   name: <required>
   #   prefix: <path>                        # Root directory for accessing the plane (if pvc defined), or url to access plane.
-  #   subPath: <relative-path>              # Optional sub directory within <prefix> to use as the root directory
-  #   pvc: <name>                           # The name of the persistant volume claim
-  #   numDevices: 1                         # number of devices that are part of the plane
-  #   replication: nullptr                  # a list or single item indicating which planes the data should be replicated onto
-  #   includeDeviceInPath: false            # Is the device number appended to the mount for the physical path for a file?  (Only required in unusual situations)
-  #   hosts: <name>                         # Name of the host group for bare metal - must match the name of the storage plane..
-  #   secret: <secret-id>                   # what secret is required to access the files.  This could optionally become a list if required (or add secrets:).
-  #   defaultSprayParts: 4                  # The number of partitions created when spraying (default: 1)
-  #   cost:                                 # The storage cost
-  #     storageAtRest: 0.113                # Storage at rest cost: cost per GiB/month
-  #   options:                              # not sure if it is needed
+  #   category: data|dali|lz|dll|spill|temp # What category of data is stored on this plane?
   #
   # For dynamic pvc creation:
   #   storageClass: ''
   #   storageSize: 1Gi
-  #   storageMode: "ReadWriteOnce"|"ReadWriteMany"
   #
+  # For persistent storage:
+  #   pvc: <name>                           # The name of the persistant volume claim
   #   forcePermissions: false
+  #   hosts: [ <host-list ]                 # Inline list of hosts
+  #   hostGroup: <name>                     # Name of the host group for bare metal - must match the name of the storage plane..
+  #
+  # Other options:
+  #   subPath: <relative-path>              # Optional sub directory within <prefix> to use as the root directory
+  #   numDevices: 1                         # number of devices that are part of the plane
+  #   secret: <secret-id>                   # what secret is required to access the files.  This could optionally become a list if required (or add secrets:).
+  #   defaultSprayParts: 4                  # The number of partitions created when spraying (default: 1)
+  #   cost:                                 # The storage cost
+  #     storageAtRest: 0.113                # Storage at rest cost: cost per GiB/month
 
   - name: dali
     storageClass: ""
@@ -390,6 +382,12 @@ eclccserver:
   maxActive: 4
   ## Specify a list of queues to listen on if you don't want this eclccserver listening on all queues. If empty or missing, listens on all queues
   listen: []
+  ## The following allows eclcc options (names start with a -) and debug options to be defined for each of the workunits that are compiled.
+  #options:
+  #- name: globalAutoHoist
+  #  value: false
+  #  cluster: name   # optional cluster this is applied to
+
   ## The following resources apply to child compile pods when useChildProcesses=false, otherwise they apply to eclccserver pod.
   #resources:
   #  cpu: "1"

+ 7 - 4
helm/managed/logging/elastic/Chart.yaml

@@ -5,7 +5,10 @@ type: application
 
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
-version: 0.1.0
+version: 1.0.0
+
+# Elastic Stack version
+appVersion: 7.12.0
 
 # Dependencies can be automatically updated via HELM dependancy update command:
 # > 'helm dependency update' command
@@ -13,12 +16,12 @@ version: 0.1.0
 # > helm install myelastic ./ —-dependency-update
 dependencies:
 - name: filebeat
-  version: 7.9.3
+  version: 7.12.0
   repository: https://helm.elastic.co
 - name: elasticsearch
-  version: 7.9.3
+  version: 7.12.0
   repository: https://helm.elastic.co
 - name: kibana # Optional managed logging processor front-end
-  version: 7.9.3
+  version: 7.12.0
   repository: https://helm.elastic.co
   condition: kibana.enabled

+ 93 - 3
helm/managed/logging/elastic/README.md

@@ -1,6 +1,12 @@
-## This folder contains lightweigth Elastic Stack deployment charts and HPCC prefered values
+## This folder contains lightweight Elastic Stack deployment chart and HPCC Systems preferred values
 
 This chart describes a local, minimal Elastic Stack instance for HPCC Systems component log processing.
+Once successfully deployed, HPCC component logs produced within the same namespace should be automatically index
+on the Elastic Search end-point. Users can query those logs by issuing Elastic Search RESTful API queries, or via
+the Kibana UI (after creating a simple index pattern).
+
+Out of the box, the Filebeat forwards the HPCC component logs to a generically named index: 'fielbeat'-<FB_VER>-<DateStamp> and specifically into a field labeled 'message'. It also aggregates k8s, Docker, and system metadata to
+help the user query the log entries of their interest.
 
 ### Dependencies
 This chart is dependent on the Elastic Stack Helm charts for ElasticSearch, Filebeats and Kibana.
@@ -8,12 +14,96 @@ This chart is dependent on the Elastic Stack Helm charts for ElasticSearch, File
 #### Dependency update
 ##### HELM Command
 Helm provides a convenient command to automatically pull appropriate dependencies to the /charts directory:
-> helm dependency update
+> helm dependency update <HPCC-Systems Git clone location>/helm/managed/logging/elastic/
 
 ##### HELM Install parameter
 Otherwise, provide the "--dependency-update" argument in the helm install command
 For example:
-> helm install myelastic ./ --dependency-update
+> helm install myelastic <HPCC-Systems Git clone location>/helm/managed/logging/elastic/ --dependency-update
+
+##### Log Query Improvements
+User are encouraged to make use of Elsatic Search "Ingest Pipelines" to improve log query performance and tailor the structure of the log record to meet their needs. The pipelines can be applied to specific indices via Elastic Search API, or via Kibana's UI.
+
+> further reading here: https://www.elastic.co/blog/structuring-elasticsearch-data-with-grok-on-ingest-for-faster-analytics
+
+The following example creates a pipeline named 'hpccpipeline' via the Elastic Search API. Alternatively, the processors can be added via Kibana's "Ingest Node Pipelines".
+
+```JSON
+PUT _ingest/pipeline/hpccpipeline
+{
+    "processors" : [
+      {
+        "grok" : {
+          "field" : "message",
+          "patterns" : [
+            """%{BASE16NUM:hpcc.log.sequence}\s+(%{WORD:hpcc.log.audience})\s+%{WORD:hpcc.log.class}\s+%{TIMESTAMP_ISO8601:hpcc.log.timestamp}\s+%{POSINT:hpcc.log.procid\s+%{POSINT:hpcc.log.threadid}\s+%{WORD:hpcc.log.jobid}\s+%{QUOTEDSTRING:hpcc.log.message}""",
+            """%{BASE16NUM:hpcc.log.sequence}\s+%{WORD:hpcc.log.audience}\s+%{WORD:hpcc.log.class}\s%{TIMESTAMP_ISO8601:hpcc.log.timestamp}\s+%{POSINT:hpcc.log.procid}\s+%{POSINT:hpcc.log.threadid}\s+%{WORD:hpcc.log.jobid}\s+%{GREEDYDATA:hpcc.log.message}"""
+          ]
+        }
+      }
+    ],
+    "on_failure" : [
+      {
+        "set" : {
+          "field" : "error.message",
+          "value" : "{{ _ingest.on_failure_message }}"
+        }
+      }
+    ]
+}
+```
+
+Once you've verified your request was applied successfully, the target Elastic Search index should be associated with this pipeline. In the managed Elastic Stack values.yml file, set "filebeat.pipeline.filebeatConfig.filebeat.yml.output.elasticsearch.pipeline" to 'hpccpipeline':
+
+```yaml
+filebeat:
+  description: "HPCC Managed filebeat"
+  filebeatConfig:
+    filebeat.yml: |
+      filebeat.inputs:
+      - type: container
+        paths:
+          - /var/log/containers/esdl-sandbox-*.log
+          - /var/log/containers/eclwatch-*.log
+          - /var/log/containers/mydali-*.log
+          - /var/log/containers/eclqueries-*.log
+          - /var/log/containers/sql2ecl-*.log
+          - /var/log/containers/eclservices-*.log
+          - /var/log/containers/dfuserver-*.log
+          - /var/log/containers/eclscheduler-*.log
+          - /var/log/containers/hthor-*.log
+          - /var/log/containers/myeclccserver-*.log
+          - /var/log/containers/roxie-*.log
+          - /var/log/containers/sasha-*.log
+          - /var/log/containers/thor-*.log
+        processors:
+        - add_kubernetes_metadata:
+            host: ${NODE_NAME}
+            matchers:
+            - logs_path:
+                logs_path: "/var/log/containers/"
+      output.elasticsearch:
+        host: '${NODE_NAME}'
+        hosts: '${ELASTICSEARCH_HOSTS:elasticsearch-master:9200}'
+        pipeline: 'hpccpipeline'
+```
+
+Warning: if the named pipeline is not found, the filebeat component might not deploy successfully.
+
+The targeted index should now contain a series of "hpcc.log.*" fields which can be used to query and extract specific HPCC component log data.
+
+For example, search all HPCC component reported errors:
+
+```json
+{
+  "query": {
+    "match_phrase": {
+      "hpcc.log.class": "ERR"
+    }
+  }
+}
+```
+
 
 ##### Cleanup
 The Elastic Search chart will declare a PVC which is used to persist data related to its indexes, and thus, the HPCC Component logs. The PVCs by nature can outlive the HPCC and Elastic deployments, it is up to the user to manage the PVC appropriately, which includes deleting the PVC when they are no longer needed.

+ 39 - 16
helm/managed/logging/elastic/values.yaml

@@ -29,22 +29,45 @@ filebeat:
   labels: {"managedby" : "HPCC"}
   ## Allows you to add any config files in /usr/share/filebeat
   ## such as filebeat.yml
-  #filebeatConfig:
-    #filebeat.yml: |
-      #filebeat.inputs:
-      #- type: container
-      #  paths:
-      #    - /var/log/containers/*.log
-        #processors:
-        #- add_kubernetes_metadata:
-            #host: ${NODE_NAME}
-            #matchers:
-            #- logs_path:
-                #logs_path: "/var/log/containers/"
-      #output.elasticsearch:
-      #  host: '${NODE_NAME}'
-      #  hosts: '${ELASTICSEARCH_HOSTS:elasticsearch-master:9200}'
-      #  index: "hpcc-%{[fields.log_type]}-%{[agent.version]}-%{+yyyy.MM.dd}"
+  filebeatConfig:
+    filebeat.yml: |
+      filebeat.inputs:
+      - type: container
+        paths:
+          - /var/log/containers/esdl-sandbox-*.log
+          - /var/log/containers/eclwatch-*.log
+          - /var/log/containers/mydali-*.log
+          - /var/log/containers/eclqueries-*.log
+          - /var/log/containers/sql2ecl-*.log
+          - /var/log/containers/eclservices-*.log
+          - /var/log/containers/dfuserver-*.log
+          - /var/log/containers/eclscheduler-*.log
+          - /var/log/containers/hthor-*.log
+          - /var/log/containers/myeclccserver-*.log
+          - /var/log/containers/roxie-*.log
+          - /var/log/containers/sasha-*.log
+          - /var/log/containers/thor-*.log
+        #exclude_files: ['(myelk-kibana|myelk-filebeat)+(.*).log']
+        processors:
+        - add_kubernetes_metadata:
+            host: ${NODE_NAME}
+            matchers:
+            - logs_path:
+                logs_path: "/var/log/containers/"
+      #Required if targeting non-default index (filebeat-%{[agent.version]}-%{+yyyy.MM.dd}) such as hpccsystems-%{[fields.log_type]}-%{[agent.version]}-%{+yyyy.MM.dd}, etc.
+       #setup.ilm.enabled: false
+       #setup.template.overwrite: true
+      output.elasticsearch:
+        host: '${NODE_NAME}'
+        hosts: '${ELASTICSEARCH_HOSTS:elasticsearch-master:9200}'
+      #NOTE: Pipeline could be used to provide Log structure and therefore enhance search capabilities of HPCC component log entries
+      #      Pipeline must be manually inserted either via Elastic Search API, or Kibana Pipeline ingest UI.
+      #      See https://github.com/hpcc-systems/HPCC-Platform/blob/master/helm/managed/logging/elastic/README.md
+      #  pipeline: 'hpccpipeline'
+      #  index: "hpccsystems-%{[fields.log_type]}-%{[agent.version]}-%{+yyyy.MM.dd}"
+      #setup.template.name: hpccsystems
+      #setup.template.pattern: hpccsystems-*
+      #setup.template.enabled: true
 
 ##The kibana component can be customized by modifying helm chart values here.
 kibana:

File diff suppressed because it is too large
+ 0 - 92
helm/storage.rst


+ 1 - 1
initfiles/bash/etc/init.d/pid.sh

@@ -174,7 +174,7 @@ check_status() {
 checkSentinelFile() {
     FILEPATH="${runtime}/${compName}"
     if [[ -d ${FILEPATH} ]]; then
-       fileCheckOP=$(find ${FILEPATH} -name "*senti*")
+       fileCheckOP=$(find ${FILEPATH} -maxdepth 1 -name "*senti*")
        [[ ! -z "${fileCheckOP}" ]] && return 1
     fi
     return 0

+ 1 - 1
initfiles/componentfiles/configxml/roxie.xsd.in

@@ -725,7 +725,7 @@
         </xs:appinfo>
       </xs:annotation>
     </xs:attribute>
-    <xs:attribute name="maxHttpConnectionRequests" type="xs:nonNegativeInteger" use="optional" default="10">
+    <xs:attribute name="maxHttpConnectionRequests" type="xs:nonNegativeInteger" use="optional" default="0">
       <xs:annotation>
         <xs:appinfo>
           <tooltip>Max number of query requests per persistent http connection</tooltip>

+ 1 - 1
initfiles/etc/DIR_NAME/environment.xml.in

@@ -87,7 +87,7 @@
                 logQueueLen="512"
                 lowTimeout="10000"
                 maxBlockSize="10000000"
-                maxHttpConnectionRequests="10"
+                maxHttpConnectionRequests="0"
                 maxHttpKeepAliveWait="5000"
                 maxLocalFilesOpen="4000"
                 maxLockAttempts="5"

+ 37 - 0
plugins/fileservices/fileservices.cpp

@@ -62,6 +62,7 @@ static const char * EclDefinition =
 "export integer4 PREFIX_VARIABLE_BIGENDIAN_RECSIZE := -4; // special value for SprayFixed record size \n"
 "export FsDropZone := string; \n"
 "export FsDropZoneRecord := record FsDropZone dropzone; end; \n"
+"export FsLandingZoneRecord := record string name; string path; string hostname; end; \n"
 "export FileServices := SERVICE : time\n"
 "  boolean FileExists(const varstring lfn, boolean physical=false) : c,context,entrypoint='fsFileExists'; \n"
 "  DeleteLogicalFile(const varstring lfn,boolean ifexists=false) : c,action,context,entrypoint='fsDeleteLogicalFile'; \n"
@@ -140,6 +141,7 @@ static const char * EclDefinition =
 "  varstring GetEspURL(const varstring username = '', const varstring userPW = '') : c,once,entrypoint='fsGetEspURL'; \n"
 "  varstring GetDefaultDropZone() : c,once,entrypoint='fsGetDefaultDropZone'; \n"
 "  dataset(FsDropZoneRecord) GetDropZones() : c,context,entrypoint='fsGetDropZones'; \n"
+"  dataset(FsLandingZoneRecord) GetLandingZones() : c,context,entrypoint='fsGetLandingZones'; \n"
 "  integer4 GetExpireDays(const varstring lfn) : c,context,entrypoint='fsGetExpireDays'; \n"
 "  SetExpireDays(const varstring lfn, integer4 expireDays) : c,context,entrypoint='fsSetExpireDays'; \n"
 "  ClearExpireDays(const varstring lfn) : c,context,entrypoint='fsClearExpireDays'; \n"
@@ -3109,6 +3111,41 @@ FILESERVICES_API void FILESERVICES_CALL fsGetDropZones(ICodeContext *ctx, size32
 }
 
 
+FILESERVICES_API void FILESERVICES_CALL fsGetLandingZones(ICodeContext *ctx, size32_t & __lenResult, void * & __result)
+{
+    MemoryBuffer mb;
+    size32_t sz;
+    Owned<IPropertyTree> global = getGlobalConfig();
+    Owned<IPropertyTreeIterator> dropZones = global->getElements("storage/planes[labels='lz']");
+    ForEach(*dropZones)
+    {
+        const IPropertyTree &dropZone = dropZones->query();
+        const char * name = dropZone.queryProp("@name");
+        const char * directory = dropZone.queryProp("@prefix");
+        const char * hostGroup = dropZone.queryProp("@hostGroup");
+        // field "name"
+        sz = strlen(name);
+        mb.append(sz).append(sz,name);
+
+        // field "prefix"
+        sz = strlen(directory);
+        mb.append(sz).append(sz,directory);
+
+        // field "host"
+        const char * host = nullptr;
+        if (hostGroup)
+        {
+            VStringBuffer xpath("storage/hostGroups[@name='%s']", name);
+            IPropertyTree * match = global->queryPropTree(xpath);
+            if (match)
+                host = match->queryProp("hosts[1]");
+        }
+        sz = host ? strlen(host) : 0;
+        mb.append(sz).append(sz, host);
+    }
+    __lenResult = mb.length();
+    __result = mb.detach();
+}
 
 FILESERVICES_API int FILESERVICES_CALL fsGetExpireDays(ICodeContext * ctx, const char *_lfn)
 {

+ 1 - 0
plugins/fileservices/fileservices.hpp

@@ -178,6 +178,7 @@ FILESERVICES_API void FILESERVICES_CALL fsDfuPlusExec(ICodeContext * ctx,const c
 FILESERVICES_API char * FILESERVICES_CALL fsGetEspURL(const char *username, const char *userPW);
 FILESERVICES_API char * FILESERVICES_CALL fsGetDefaultDropZone();
 FILESERVICES_API void FILESERVICES_CALL fsGetDropZones(ICodeContext *ctx,size32_t & __lenResult,void * & __result);
+FILESERVICES_API void FILESERVICES_CALL fsGetLandingZones(ICodeContext *ctx,size32_t & __lenResult,void * & __result);
 FILESERVICES_API int FILESERVICES_CALL fsGetExpireDays(ICodeContext * ctx, const char *lfn);
 FILESERVICES_API void FILESERVICES_CALL fsSetExpireDays(ICodeContext * ctx, const char *lfn, int expireDays);
 FILESERVICES_API void FILESERVICES_CALL fsClearExpireDays(ICodeContext * ctx, const char *lfn);

+ 1 - 0
plugins/parselib/parselib.cpp

@@ -20,6 +20,7 @@
 #include <string.h>
 #include <ctype.h>
 #include "jlib.hpp"
+#include "jhash.hpp"
 #include "thorparse.hpp"
 #include "parselib.hpp"
 

+ 1 - 0
plugins/unicodelib/unicodelib.cpp

@@ -16,6 +16,7 @@
 ############################################################################## */
 
 #include "jlib.hpp"
+#include "jhash.hpp"
 #include "jsem.hpp"
 
 #include <string.h>

+ 3 - 0
roxie/CMakeLists.txt

@@ -14,6 +14,9 @@
 #    limitations under the License.
 ################################################################################
 HPCC_ADD_SUBDIRECTORY (ccd)
+IF (NOT WIN32)
+  HPCC_ADD_SUBDIRECTORY (ccdcache "PLATFORM")
+ENDIF()
 HPCC_ADD_SUBDIRECTORY (roxie "PLATFORM")
 HPCC_ADD_SUBDIRECTORY (topo "PLATFORM")
 HPCC_ADD_SUBDIRECTORY (roxiemem)

+ 4 - 4
roxie/ccd/CMakeLists.txt

@@ -27,7 +27,8 @@
 project( ccd ) 
 
 set (   SRCS 
-        ccdactivities.cpp 
+        ccdactivities.cpp
+        ../ccdcache/ccdcache.cpp
         ccddali.cpp
         ccdcontext.cpp
         ccddebug.cpp
@@ -68,6 +69,7 @@ include_directories (
          ${HPCC_SOURCE_DIR}/system/mp
          ${HPCC_SOURCE_DIR}/common/workunit
          ${HPCC_SOURCE_DIR}/roxie/udplib
+         ${HPCC_SOURCE_DIR}/roxie/ccdcache
          ${HPCC_SOURCE_DIR}/roxie/roxie
          ${HPCC_SOURCE_DIR}/common/environment
          ${HPCC_SOURCE_DIR}/ecl/hthor
@@ -139,6 +141,4 @@ ENDIF()
 IF (USE_TBBMALLOC AND USE_TBBMALLOC_ROXIE)
    add_dependencies ( ccd tbb )
    target_link_libraries ( ccd libtbbmalloc_proxy libtbbmalloc)
-ENDIF()
-
-
+ENDIF()

+ 21 - 20
roxie/ccd/ccdcontext.cpp

@@ -1924,32 +1924,31 @@ public:
     }
     virtual void getResultDecimal(unsigned tlen, int precision, bool isSigned, void * tgt, const char * stepname, unsigned sequence)
     {
-        if (isSpecialResultSequence(sequence))
+        memset(tgt, 0, tlen);
+        CriticalBlock b(contextCrit);
+        IPropertyTree &ctx = useContext(sequence);
+        if (ctx.hasProp(stepname))
         {
-            MemoryBuffer m;
-            CriticalBlock b(contextCrit);
-            useContext(sequence).getPropBin(stepname, m);
-            if (m.length())
+            if (ctx.isBinary(stepname))
             {
-                assertex(m.length() == tlen);
-                m.read(tlen, tgt);
+                MemoryBuffer m;
+                ctx.getPropBin(stepname, m);
+                if (m.length())
+                {
+                    assertex(m.length() == tlen);
+                    m.read(tlen, tgt);
+                }
             }
             else
-                memset(tgt, 0, tlen);
-        }
-        else
-        {
-            StringBuffer x;
             {
-                CriticalBlock b(contextCrit);
-                useContext(sequence).getProp(stepname, x);
+                const char *val = ctx.queryProp(stepname);
+                Decimal d;
+                d.setCString(val);
+                if (isSigned)
+                    d.getDecimal(tlen, precision, tgt);
+                else
+                    d.getUDecimal(tlen, precision, tgt);
             }
-            Decimal d;
-            d.setString(x.length(), x.str());
-            if (isSigned)
-                d.getDecimal(tlen, precision, tgt);
-            else
-                d.getUDecimal(tlen, precision, tgt);
         }
     }
     virtual __int64 getResultInt(const char * name, unsigned sequence)
@@ -3941,6 +3940,8 @@ public:
 
     virtual bool okToLogStartStopError()
     {
+        if (!actResetLogPeriod)
+            return false;
         // Each query starts with actResetLogState set to LogResetInit
         // State is changed to LogResetOK and a timer is started
         // If same query runs again then time since last logged is checked

+ 100 - 213
roxie/ccd/ccdfile.cpp

@@ -35,6 +35,7 @@
 #include "ccdsnmp.hpp"
 #include "rmtfile.hpp"
 #include "ccdqueue.ipp"
+#include "ccdcache.hpp"
 #if defined(__linux__) || defined(__APPLE__)
 #include <sys/mman.h>
 #endif
@@ -624,57 +625,6 @@ typedef StringArray *StringArrayPtr;
 
 // A circular buffer recording recent disk read operations that can be used to "prewarm" the cache
 
-struct CacheInfoEntry
-{
-    //For convenience the values for PageType match the NodeX enumeration (see noteWarm).
-    //Ensure disk entries sort last so that index nodes take precedence when deduping offsets.
-    enum PageType : unsigned
-    {
-        PageTypeBranch = 0,
-        PageTypeLeaf = 1,
-        PageTypeBlob = 2,
-        PageTypeDisk = 3,
-    };
-
-    union
-    {
-        struct
-        {
-#ifndef _WIN32
-            unsigned type: 2;    // disk or the kind of index node
-            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
-            unsigned file: 24;   // Up to 4 million files
-#else
-//Windows does not like packing bitfields with different base types - fails the statck assert
-            __uint64 type: 2;    // disk or the kind of index node
-            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
-            __uint64 file: 24;   // Up to 4 million files
-#endif
-        } b;
-        __uint64 u;
-    };
-
-#ifndef _WIN32
-    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
-#elif _MSC_VER >= 1900
-    //Older versions of the windows compiler complain CacheInfoEntry::b is not a type name
-    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
-#endif
-
-    inline CacheInfoEntry() { u = 0; }
-    inline CacheInfoEntry(unsigned _file, offset_t _pos, PageType pageType)
-    {
-        b.file = _file;
-        b.page = _pos >> pageBits;
-        b.type = pageType;
-    }
-    inline bool operator < ( const CacheInfoEntry &l) const { return u < l.u; }
-    inline bool operator <= ( const CacheInfoEntry &l) const { return u <= l.u; }
-    inline void operator++ () { b.page++; }
-
-    static constexpr unsigned pageBits = 13;  // 8k 'pages'
-};
-
 class CacheReportingBuffer : public CInterfaceOf<ICacheInfoRecorder>
 {
     // A circular buffer recording recent file activity. Note that noteRead() and clear() may be called from multiple threads
@@ -817,6 +767,75 @@ private:
     }
 };
 
+class IndexCacheWarmer : implements ICacheWarmer
+{
+    IRoxieFileCache *cache = nullptr;
+    Owned<ILazyFileIO> localFile;
+    Owned<IKeyIndex> keyIndex;
+    bool keyFailed = false;
+    unsigned fileIdx = (unsigned) -1;
+    unsigned filesProcessed = 0;
+    unsigned pagesPreloaded = 0;
+public:
+    IndexCacheWarmer(IRoxieFileCache *_cache) : cache(_cache) {}
+
+    virtual void startFile(const char *filename) override
+    {
+        // "filename" is the filename that roxie would use if it copied the file locally.  This may not
+        // match the name of the actual file - e.g. if the file is local but in a different location.
+        localFile.setown(cache->lookupLocalFile(filename));
+        if (localFile)
+        {
+            fileIdx = localFile->getFileIdx();
+        }
+        keyFailed = false;
+        filesProcessed++;
+    }
+
+    virtual bool warmBlock(const char *filename, NodeType nodeType, offset_t startOffset, offset_t endOffset) override
+    {
+        if (nodeType != NodeNone && !keyFailed && localFile && !keyIndex)
+        {
+            //Pass false for isTLK - it will be initialised from the index header
+            keyIndex.setown(createKeyIndex(filename, localFile->getCrc(), *localFile.get(), fileIdx, false));
+            if (!keyIndex)
+                keyFailed = true;
+        }
+        if (nodeType != NodeNone && keyIndex)
+        {
+            // Round startOffset up to nearest multiple of index node size
+            unsigned nodeSize = keyIndex->getNodeSize();
+            startOffset = ((startOffset+nodeSize-1)/nodeSize)*nodeSize;
+            do
+            {
+                if (traceLevel > 8)
+                    DBGLOG("prewarming index page %u %s %" I64F "x-%" I64F "x", (int) nodeType, filename, startOffset, endOffset);
+                bool loaded = keyIndex->prewarmPage(startOffset, nodeType);
+                if (!loaded)
+                    break;
+                pagesPreloaded++;
+                startOffset += nodeSize;
+            }
+            while (startOffset < endOffset);
+        }
+        else if (fileIdx != (unsigned) -1)
+            cache->noteRead(fileIdx, startOffset, (endOffset-1) - startOffset);  // Ensure pages we prewarm are recorded in our cache tracker
+        return true;
+    }
+
+    virtual void endFile() override
+    {
+        localFile.clear();
+        keyIndex.clear();
+    }
+
+    virtual void report() override
+    {
+        if (traceLevel)
+            DBGLOG("Processed %u files and preloaded %u index nodes", filesProcessed, pagesPreloaded);
+    }
+};
+
 class CRoxieFileCache : implements IRoxieFileCache, implements ICopyFileProgress, public CInterface
 {
     friend class CcdFileTest;
@@ -1151,7 +1170,7 @@ class CRoxieFileCache : implements IRoxieFileCache, implements ICopyFileProgress
             bool hardLinkCreated = false;
             unsigned start = msTick();
 #ifdef _DEBUG
-            if (topology->getPropBool("@simulateSlowCopies"))
+            if (topology && topology->getPropBool("@simulateSlowCopies"))  // topology is null when running unit tests
             {
                 DBGLOG("Simulating a slow copy");
                 Sleep(10*1000);
@@ -1772,7 +1791,7 @@ public:
         return ret.getLink();
     }
 
-    ILazyFileIO *lookupLocalFile(const char *filename)
+    virtual ILazyFileIO *lookupLocalFile(const char *filename)
     {
         try
         {
@@ -1823,6 +1842,8 @@ public:
 
     virtual void loadSavedOsCacheInfo() override
     {
+        if (!topology->getPropBool("@warmOsCache", true))
+            return;
         Owned<const ITopologyServer> topology = getTopology();
         for (unsigned channel : topology->queryChannels())
             doLoadSavedOsCacheInfo(channel);
@@ -1839,16 +1860,35 @@ public:
         if (!dllserver_root)
             return;
 #endif
+        unsigned cacheWarmTraceLevel = topology->getPropInt("@cacheWarmTraceLevel", traceLevel);
         VStringBuffer cacheFileName("%s/%s/cacheInfo.%d", dllserver_root, roxieName.str(), channel);
         StringBuffer cacheInfo;
         try
         {
             if (checkFileExists(cacheFileName))
             {
+#ifndef _WIN32
+                StringBuffer output;
+                VStringBuffer command("ccdcache %s -t %u", cacheFileName.str(), cacheWarmTraceLevel);
+                unsigned retcode = runExternalCommand(nullptr, output, output, command, nullptr);
+                if (output.length())
+                {
+                    StringArray outputLines;
+                    outputLines.appendList(output, "\n");
+                    ForEachItemIn(idx, outputLines)
+                    {
+                        const char *line = outputLines.item(idx);
+                        if (line && *line)
+                            DBGLOG("ccdcache: %s", line);
+                    }
+                }
+                if (retcode)
+                    DBGLOG("ccdcache failed with exit code %u", retcode);
+#endif
                 cacheInfo.loadFile(cacheFileName, false);
-                warmOsCache(cacheInfo);
                 if (traceLevel)
-                    DBGLOG("Loaded cache information from %s for channel %d", cacheFileName.str(), channel);
+                    DBGLOG("Loading cache information from %s for channel %d", cacheFileName.str(), channel);
+                warmOsCache(cacheInfo);
             }
         }
         catch(IException *E)
@@ -1863,163 +1903,10 @@ public:
     {
         if (!cacheInfo)
             return;
-#ifndef _WIN32
-        size_t os_page_size = getpagesize();
-#endif
-        char t = 0;
-        unsigned touched = 0;
-        unsigned preloaded = 0;
-        Owned<const ITopologyServer> topology = getTopology();
-        while (*cacheInfo)
-        {
-            // We are parsing lines that look like:
-            // <channel>|<filename>|<pagelist>
-            //
-            // Where pagelist is a space-separated list of page numbers or (inclusive) ranges.
-            // A page number or range prefixed by a * means that the page(s) was found in the jhtree cache.
-            //
-            // For example,
-            // 1|/var/lib/HPCCSystems/hpcc-data/unknown/regress/multi/dg_index_evens._1_of_3|*0 3-4
-            // Pages are always recorded and specified as 8192 bytes (unless pagebits ever changes).
-
-            unsigned fileChannel = strtoul(cacheInfo, (char **) &cacheInfo, 10);
-            if (*cacheInfo != '|')
-                break;
-            if (!topology->implementsChannel(fileChannel))
-            {
-                const char *eol = strchr(cacheInfo, '\n');
-                if (!eol)
-                    break;
-                cacheInfo = eol+1;
-                continue;
-            }
-            cacheInfo++;
-            const char *endName = strchr(cacheInfo, '|');
-            assert(endName);
-            if (!endName)
-                break;
-            StringBuffer fileName(endName-cacheInfo, cacheInfo);
-            Owned<IKeyIndex> keyIndex;
-            bool keyFailed = false;
-            unsigned fileIdx = (unsigned) -1;
-#ifndef _WIN32
-            char *file_mmap = nullptr;
-            int fd = open(fileName, 0);
-            struct stat file_stat;
-            if (fd != -1)
-            {
-                fstat(fd, &file_stat);
-                file_mmap = (char *) mmap((void *)0, file_stat.st_size, PROT_READ, MAP_SHARED, fd, 0);
-                if (file_mmap == MAP_FAILED)
-                {
-                    DBGLOG("Failed to map file %s to pre-warm cache (error %d)", fileName.str(), errno);
-                    file_mmap = nullptr;
-                }
-            }
-            else if (traceLevel)
-            {
-                DBGLOG("Failed to open file %s to pre-warm cache (error %d)", fileName.str(), errno);
-            }
-#endif
-            // "fileName" is the filename that roxie would use if it copied the file locally.  This may not
-            // match the name of the actual file - e.g. if the file is local but in a different location.
-            Owned<ILazyFileIO> localFile = lookupLocalFile(fileName);
-            if (localFile)
-            {
-                fileIdx = localFile->getFileIdx();
-            }
-            cacheInfo = endName+1;  // Skip the |
-            while (*cacheInfo==' ')
-                cacheInfo++;
-            for (;;)
-            {
-                bool inNodeCache = (*cacheInfo=='*');
-                NodeType nodeType = NodeNone;
-                if (inNodeCache)
-                {
-                    cacheInfo++;
-                    switch (*cacheInfo)
-                    {
-                    case 'R': nodeType = NodeBranch; break;
-                    case 'L': nodeType = NodeLeaf; break;
-                    case 'B': nodeType = NodeBlob; break;
-                    default:
-                        throwUnexpectedX("Unknown node type");
-                    }
-                    cacheInfo++;
-                }
-                __uint64 startPage = readPage(cacheInfo);
-                __uint64 endPage;
-                if (*cacheInfo=='-')
-                {
-                    cacheInfo++;
-                    endPage = readPage(cacheInfo);
-                }
-                else
-                    endPage = startPage;
-                if (traceLevel > 8)
-                    DBGLOG("Touching %s %" I64F "x-%" I64F "x", fileName.str(), startPage, endPage);
-                offset_t startOffset = startPage << CacheInfoEntry::pageBits;
-                offset_t endOffset = (endPage+1) << CacheInfoEntry::pageBits;
-                if (inNodeCache && !keyFailed && localFile && !keyIndex)
-                {
-                    //Pass false for isTLK - it will be initialised from the index header
-                    keyIndex.setown(createKeyIndex(fileName, localFile->getCrc(), *localFile.get(), fileIdx, false));
-                    if (!keyIndex)
-                        keyFailed = true;
-                }
-                if (inNodeCache && keyIndex)
-                {
-                    // Round startOffset up to nearest multiple of index node size
-                    unsigned nodeSize = keyIndex->getNodeSize();
-                    startOffset = ((startOffset+nodeSize-1)/nodeSize)*nodeSize;
-                    do
-                    {
-                        bool loaded = keyIndex->prewarmPage(startOffset, nodeType);
-                        if (!loaded)
-                            break;
-                        preloaded++;
-                        startOffset += nodeSize;
-                    }
-                    while (startOffset < endOffset);
-                }
-#ifndef _WIN32
-                else if (file_mmap)
-                {
-                    if (fileIdx != (unsigned) -1)
-                        noteRead(fileIdx, startOffset, (endOffset-1) - startOffset);  // Ensure pages we prewarm are recorded in our cache tracker
-                    do
-                    {
-                        if (startOffset >= (offset_t) file_stat.st_size)
-                            break;    // Let's not core if the file has changed size since we recorded the info...
-                        t += file_mmap[startOffset];  // NOTE - t reported below so it cannot be optimized out
-                        touched++;
-                        startOffset += os_page_size;
-                    }
-                    while (startOffset < endOffset);
-                }
-#endif
-                if (*cacheInfo != ' ')
-                    break;
-                cacheInfo++;
-            }
-#ifndef _WIN32
-            if (file_mmap)
-                munmap(file_mmap, file_stat.st_size);
-            if (fd != -1)
-                close(fd);
-#endif
-            if (*cacheInfo != '\n')
-                break;
-            cacheInfo++;
-        }
-        assert(!*cacheInfo);
-        if (*cacheInfo)
-        {
-            DBGLOG("WARNING: Unrecognized cacheInfo format at %.20s", cacheInfo);
-        }
-        if (traceLevel)
-            DBGLOG("Touched %d pages, preloaded %d index nodes, result %d", touched, preloaded, t);  // We report t to make sure that compiler doesn't decide to optimize it away entirely
+        IndexCacheWarmer warmer(this);
+        if (!::warmOsCache(cacheInfo, &warmer))
+            DBGLOG("WARNING: Unrecognized cacheInfo format");
+        warmer.report();
     }
 
     virtual void clearOsCache() override
@@ -2305,7 +2192,7 @@ public:
     {
         if (part > numParts || part == 0)
         {
-            throw MakeStringException(ROXIE_FILE_ERROR, "Internal error - requesting base for non-existant file part %d (valid are 1-%d)", part, numParts);
+            throw MakeStringException(ROXIE_FILE_ERROR, "Internal error - requesting base for non-existent file part %d (valid are 1-%d)", part, numParts);
         }
         return map[part-1].base;
     }

+ 1 - 0
roxie/ccd/ccdfile.hpp

@@ -75,6 +75,7 @@ interface IRoxieFileCache : extends IInterface
     virtual void loadSavedOsCacheInfo() = 0;
     virtual void noteRead(unsigned fileIdx, offset_t pos, unsigned len) = 0;
     virtual void startCacheReporter() = 0;
+    virtual ILazyFileIO *lookupLocalFile(const char *filename) = 0;
 };
 
 interface IDiffFileInfoCache : extends IInterface

+ 9 - 6
roxie/ccd/ccdlistener.cpp

@@ -965,7 +965,7 @@ public:
         worker->threadmain();
     }
 
-    virtual void noteQuery(IHpccProtocolMsgContext *msgctx, const char *peer, bool failed, unsigned bytesOut, unsigned elapsed, unsigned memused, unsigned agentsReplyLen, unsigned agentsDuplicates, unsigned agentsResends, bool continuationNeeded)
+    virtual void noteQuery(IHpccProtocolMsgContext *msgctx, const char *peer, bool failed, unsigned bytesOut, unsigned elapsed, unsigned memused, unsigned agentsReplyLen, unsigned agentsDuplicates, unsigned agentsResends, bool continuationNeeded, unsigned requestArraySize)
     {
     }
 
@@ -1448,7 +1448,7 @@ public:
             return;
         uid.set(id);
         ensureContextLogger();
-        if (!isEmptyString(logctx->queryGlobalId())) //globalId wins
+        if (!global && !isEmptyString(logctx->queryGlobalId())) //globalId wins
             return;
         StringBuffer s;
         ep.getIpText(s).appendf(":%u{%s}", ep.port, uid.str()); //keep no matter what for existing log parsers
@@ -1590,7 +1590,7 @@ public:
             combinedQueryStats.noteQuery(failed, elapsedTime);
         }
     }
-    void noteQuery(const char *peer, bool failed, unsigned elapsed, unsigned memused, unsigned agentsReplyLen, unsigned agentsDuplicates, unsigned agentsResends, unsigned bytesOut, bool continuationNeeded)
+    void noteQuery(const char *peer, bool failed, unsigned elapsed, unsigned memused, unsigned agentsReplyLen, unsigned agentsDuplicates, unsigned agentsResends, unsigned bytesOut, bool continuationNeeded, unsigned requestArraySize)
     {
         noteQueryStats(failed, elapsed);
         if (queryFactory)
@@ -1617,7 +1617,10 @@ public:
                 }
                 if (txIds.length())
                     txIds.insert(0, '[').append(']');
-                logctx->CTXLOG("COMPLETE: %s %s%s from %s complete in %u msecs memory=%u Mb priority=%d agentsreply=%u duplicatePackets=%u resentPackets=%u resultsize=%u continue=%d%s", queryName.get(), uid.get(), txIds.str(), peer, elapsed, memused, getQueryPriority(), agentsReplyLen, agentsDuplicates, agentsResends, bytesOut, continuationNeeded, s.str());
+                if (requestArraySize > 1)
+                    logctx->CTXLOG("COMPLETE: %s(x%u) %s%s from %s complete in %u msecs memory=%u Mb priority=%d agentsreply=%u duplicatePackets=%u resentPackets=%u resultsize=%u continue=%d%s", queryName.get(), requestArraySize, uid.get(), txIds.str(), peer, elapsed, memused, getQueryPriority(), agentsReplyLen, agentsDuplicates, agentsResends, bytesOut, continuationNeeded, s.str());
+                else
+                    logctx->CTXLOG("COMPLETE: %s %s%s from %s complete in %u msecs memory=%u Mb priority=%d agentsreply=%u duplicatePackets=%u resentPackets=%u resultsize=%u continue=%d%s", queryName.get(), uid.get(), txIds.str(), peer, elapsed, memused, getQueryPriority(), agentsReplyLen, agentsDuplicates, agentsResends, bytesOut, continuationNeeded, s.str());
             }
         }
     }
@@ -1888,10 +1891,10 @@ public:
         roxieMsgCtx->ensureDebugCommandHandler().doDebugCommand(msg, &roxieMsgCtx->ensureDebuggerContext(uid), out);
     }
 
-    virtual void noteQuery(IHpccProtocolMsgContext *msgctx, const char *peer, bool failed, unsigned bytesOut, unsigned elapsed, unsigned memused, unsigned agentsReplyLen, unsigned agentsDuplicates, unsigned agentsResends, bool continuationNeeded)
+    virtual void noteQuery(IHpccProtocolMsgContext *msgctx, const char *peer, bool failed, unsigned bytesOut, unsigned elapsed, unsigned memused, unsigned agentsReplyLen, unsigned agentsDuplicates, unsigned agentsResends, bool continuationNeeded, unsigned requestArraySize)
     {
         RoxieProtocolMsgContext *roxieMsgCtx = checkGetRoxieMsgContext(msgctx);
-        roxieMsgCtx->noteQuery(peer, failed, elapsed, memused, agentsReplyLen, agentsDuplicates, agentsResends, bytesOut, continuationNeeded);
+        roxieMsgCtx->noteQuery(peer, failed, elapsed, memused, agentsReplyLen, agentsDuplicates, agentsResends, bytesOut, continuationNeeded, requestArraySize);
     }
 
 };

+ 30 - 7
roxie/ccd/ccdprotocol.cpp

@@ -56,7 +56,7 @@ public:
         defaultXmlReadFlags = ctx.ctxGetPropBool("@defaultStripLeadingWhitespace", true) ? ptr_ignoreWhiteSpace : ptr_none;
         trapTooManyActiveQueries = ctx.ctxGetPropBool("@trapTooManyActiveQueries", true);
         numRequestArrayThreads = ctx.ctxGetPropInt("@requestArrayThreads", 5);
-        maxHttpConnectionRequests = ctx.ctxGetPropInt("@maxHttpConnectionRequests", 10);
+        maxHttpConnectionRequests = ctx.ctxGetPropInt("@maxHttpConnectionRequests", 0);
         maxHttpKeepAliveWait = ctx.ctxGetPropInt("@maxHttpKeepAliveWait", 5000); // In milliseconds
     }
     IHpccProtocolListener *createListener(const char *protocol, IHpccProtocolMsgSink *sink, unsigned port, unsigned listenQueue, const char *config, const char *certFile=nullptr, const char *keyFile=nullptr, const char *passPhrase=nullptr)
@@ -69,7 +69,7 @@ public:
     PTreeReaderOptions defaultXmlReadFlags;
     unsigned maxBlockSize;
     unsigned numRequestArrayThreads;
-    unsigned maxHttpConnectionRequests = 10;
+    unsigned maxHttpConnectionRequests = 0;
     unsigned maxHttpKeepAliveWait = 5000;
     bool trapTooManyActiveQueries;
 };
@@ -1732,12 +1732,12 @@ private:
         unsigned memused = 0;
         IpAddress peer;
         bool continuationNeeded = false;
+        bool resetQstart = false;
         bool isStatus = false;
         unsigned remainingHttpConnectionRequests = global->maxHttpConnectionRequests ? global->maxHttpConnectionRequests : 1;
         unsigned readWait = WAIT_FOREVER;
 
         Owned<IHpccProtocolMsgContext> msgctx = sink->createMsgContext(startTime);
-        IContextLogger &logctx = *msgctx->queryLogContext();
 
 readAnother:
         unsigned agentsReplyLen = 0;
@@ -1762,15 +1762,32 @@ readAnother:
                     return;
                 }
             }
-            if (continuationNeeded)
+            if (resetQstart)
             {
+                resetQstart = false;
                 qstart = msTick();
                 time(&startTime);
+                msgctx.setown(sink->createMsgContext(startTime));
             }
         }
         catch (IException * E)
         {
-            if (traceLevel > 0)
+            bool expectedError = false;
+            if (resetQstart) //persistent connection - initial request has already been processed
+            {
+                switch (E->errorCode())
+                {
+                    //closing of persistent socket is not an error
+                    case JSOCKERR_not_opened:
+                    case JSOCKERR_broken_pipe:
+                    case JSOCKERR_timeout_expired:
+                    case JSOCKERR_graceful_close:
+                        expectedError = true;
+                    default:
+                        break;
+                }
+            }
+            if (traceLevel > 0 && !expectedError)
             {
                 StringBuffer b;
                 IERRLOG("Error reading query from socket: %s", E->errorMessage(b).str());
@@ -1780,6 +1797,7 @@ readAnother:
             return;
         }
 
+        IContextLogger &logctx = *msgctx->queryLogContext();
         bool isHTTP = httpHelper.isHttp();
         if (isHTTP)
         {
@@ -1817,6 +1835,7 @@ readAnother:
         bool isBlind = false;
         bool isDebug = false;
         unsigned protocolFlags = isHTTP ? 0 : HPCC_PROTOCOL_NATIVE;
+        unsigned requestArraySize = 0; //for logging, considering all the ways requests can be counted this name seems least confusing
 
         Owned<IPropertyTree> queryPT;
         StringBuffer sanitizedText;
@@ -1866,7 +1885,7 @@ readAnother:
                 Owned<IHpccProtocolResponse> protocol = createProtocolResponse(queryPT->queryName(), client, httpHelper, logctx, protocolFlags | HPCC_PROTOCOL_CONTROL, global->defaultXmlReadFlags);
                 sink->onControlMsg(msgctx, queryPT, protocol);
                 protocol->finalize(0);
-                if (streq(queryName, "lock") || streq(queryName, "childlock"))
+                if (streq(queryName, "lock") || streq(queryName, "childlock")) //don't reset qstart, lock time should be included
                     goto readAnother;
             }
             else if (isStatus)
@@ -1980,6 +1999,7 @@ readAnother:
                                         fixedreq->addPropTree(iter->query().queryName(), LINK(&iter->query()));
                                     }
                                     requestArray.append(*fixedreq);
+                                    requestArraySize++;
                                 }
                             }
                             else
@@ -1991,6 +2011,7 @@ readAnother:
                                     fixedreq->addPropTree(iter->query().queryName(), LINK(&iter->query()));
                                 }
                                 requestArray.append(*fixedreq);
+                                requestArraySize = 1;
 
                                 msgctx->setIntercept(queryPT->getPropBool("@log", false));
                                 msgctx->setTraceLevel(queryPT->getPropInt("@traceLevel", logctx.queryTraceLevel()));
@@ -2108,10 +2129,11 @@ readAnother:
         }
         unsigned bytesOut = client? client->bytesOut() : 0;
         unsigned elapsed = msTick() - qstart;
-        sink->noteQuery(msgctx.get(), peerStr, failed, bytesOut, elapsed,  memused, agentsReplyLen, agentsDuplicates, agentsResends, continuationNeeded);
+        sink->noteQuery(msgctx.get(), peerStr, failed, bytesOut, elapsed,  memused, agentsReplyLen, agentsDuplicates, agentsResends, continuationNeeded, requestArraySize);
         if (continuationNeeded)
         {
             rawText.clear();
+            resetQstart = true;
             goto readAnother;
         }
         else
@@ -2132,6 +2154,7 @@ readAnother:
                 if (isHTTP && --remainingHttpConnectionRequests > 0)
                 {
                     readWait = global->maxHttpKeepAliveWait;
+                    resetQstart = true;
                     goto readAnother;
                 }
 

+ 1 - 1
roxie/ccd/ccdserver.cpp

@@ -4900,7 +4900,7 @@ public:
             }
             else
             {
-                if (!anyActivity && !localAgent && lastActivity-msTick() >= timeout)
+                if (!anyActivity && !localAgent && (msTick()-lastActivity >= timeout))
                 {
                     activity.queryLogCtx().CTXLOG("Input has stalled for %u ms - retry required?", timeout);
                     retryPending();

+ 1 - 1
roxie/ccd/hpccprotocol.hpp

@@ -98,7 +98,7 @@ interface IHpccProtocolMsgSink : extends IInterface
     virtual IHpccProtocolMsgContext *createMsgContext(time_t startTime) = 0;
     virtual StringArray &getTargetNames(StringArray &targets) = 0;
 
-    virtual void noteQuery(IHpccProtocolMsgContext *msgctx, const char *peer, bool failed, unsigned bytesOut, unsigned elapsed, unsigned memused, unsigned agentsReplyLen, unsigned agentDuplicates, unsigned agentResends, bool continuationNeeded) = 0;
+    virtual void noteQuery(IHpccProtocolMsgContext *msgctx, const char *peer, bool failed, unsigned bytesOut, unsigned elapsed, unsigned memused, unsigned agentsReplyLen, unsigned agentDuplicates, unsigned agentResends, bool continuationNeeded, unsigned requestArraySize) = 0;
     virtual void onQueryMsg(IHpccProtocolMsgContext *msgctx, IPropertyTree *msg, IHpccProtocolResponse *protocol, unsigned flags, PTreeReaderOptions readFlags, const char *target, unsigned idx, unsigned &memused, unsigned &agentReplyLen, unsigned &agentDuplicates, unsigned &agentResends) = 0;
 };
 

+ 73 - 0
roxie/ccdcache/CMakeLists.txt

@@ -0,0 +1,73 @@
+################################################################################
+#    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+################################################################################
+
+
+project( ccdcache ) 
+
+set (   SRCS 
+        ccdcache.cpp
+    )
+
+include_directories ( 
+         .
+         ${HPCC_SOURCE_DIR}/roxie/ccd
+         ${HPCC_SOURCE_DIR}/fs/dafsclient
+         ${HPCC_SOURCE_DIR}/system/jhtree
+         ${HPCC_SOURCE_DIR}/system/mp
+         ${HPCC_SOURCE_DIR}/common/workunit
+         ${HPCC_SOURCE_DIR}/roxie/udplib
+         ${HPCC_SOURCE_DIR}/roxie/roxie
+         ${HPCC_SOURCE_DIR}/common/environment
+         ${HPCC_SOURCE_DIR}/ecl/hthor
+         ${HPCC_SOURCE_DIR}/ecl/schedulectrl
+         ${HPCC_SOURCE_DIR}/rtl/nbcd
+         ${HPCC_SOURCE_DIR}/common/deftype
+         ${HPCC_SOURCE_DIR}/system/include
+         ${HPCC_SOURCE_DIR}/dali/base
+         ${HPCC_SOURCE_DIR}/dali/dfu
+         ${HPCC_SOURCE_DIR}/roxie/roxiemem
+         ${HPCC_SOURCE_DIR}/common/dllserver
+         ${HPCC_SOURCE_DIR}/system/jlib
+         ${HPCC_SOURCE_DIR}/common/thorhelper
+         ${HPCC_SOURCE_DIR}/rtl/eclrtl
+         ${HPCC_SOURCE_DIR}/rtl/include
+         ${HPCC_SOURCE_DIR}/testing/unittests
+         ${HPCC_SOURCE_DIR}/dali/ft
+         ${HPCC_SOURCE_DIR}/system/security/shared
+         ${HPCC_SOURCE_DIR}/system/security/securesocket
+         ${HPCC_SOURCE_DIR}/system/libbase58
+    )
+
+if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG)
+  SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-switch -Wno-unused-parameter -Werror -Wno-delete-non-virtual-dtor -Wno-overloaded-virtual")
+  if (CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.0.0")
+    SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess")
+  endif()
+endif()
+
+ADD_DEFINITIONS( -D_USRDLL -D_STANDALONE_CCDCACHE )
+
+if (CMAKE_COMPILER_IS_CLANGXX)
+  SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-switch-enum -Wno-format-security -Werror=reorder")
+endif()
+
+HPCC_ADD_EXECUTABLE ( ccdcache ${SRCS} )
+install ( TARGETS ccdcache RUNTIME DESTINATION ${EXEC_DIR} )
+
+target_link_libraries ( ccdcache
+         jlib
+    )
+

+ 323 - 0
roxie/ccdcache/ccdcache.cpp

@@ -0,0 +1,323 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#include "jlib.hpp"
+#include "jfile.hpp"
+#include "jhtree.hpp"
+#include "ctfile.hpp"
+#include "ccdfile.hpp"
+#include "ccdcache.hpp"
+
+#ifdef _STANDALONE_CCDCACHE
+#if defined(__linux__) || defined(__APPLE__)
+#include <sys/mman.h>
+#endif
+#include <setjmp.h>
+#include <signal.h>
+#endif
+
+static unsigned __int64 readPage(const char * &_t)
+{
+    const char *t = _t;
+    unsigned __int64 v = 0;
+    for (;;)
+    {
+        char c = *t;
+        if ((c >= '0') && (c <= '9'))
+            v = v * 16 + (c-'0');
+        else if ((c >= 'a') && (c <= 'f'))
+            v = v * 16 + (c-'a'+10);
+        else if ((c >= 'A') && (c <= 'F'))
+            v = v * 16 + (c-'A'+10);
+        else
+            break;
+        t++;
+    }
+    _t = t;
+    return v;
+}
+
+// Note that warmOsCache is called twice for each cacheInfo file - once via separate process to touch pages into linux page cache,
+// and once within the Roxie process to preload the index cache and initialize the cache info structure for future cache reports.
+
+bool warmOsCache(const char *cacheInfo, ICacheWarmer *callback)
+{
+    if (!cacheInfo)
+        return true;
+    while (*cacheInfo)
+    {
+        // We are parsing lines that look like:
+        // <channel>|<filename>|<pagelist>
+        //
+        // Where pagelist is a space-separated list of page numbers or (inclusive) ranges.
+        // A page number or range prefixed by a * means that the page(s) was found in the jhtree cache.
+        //
+        // For example,
+        // 1|/var/lib/HPCCSystems/hpcc-data/unknown/regress/multi/dg_index_evens._1_of_3|*0 3-4
+        // Pages are always recorded and specified as 8192 bytes (unless pagebits ever changes).
+
+        strtoul(cacheInfo, (char **) &cacheInfo, 10);  // Skip fileChannel - we don't care
+        if (*cacheInfo != '|')
+            break;
+        cacheInfo++;
+        const char *endName = strchr(cacheInfo, '|');
+        assert(endName);
+        if (!endName)
+            break;
+        StringBuffer fileName(endName-cacheInfo, cacheInfo);
+        callback->startFile(fileName.str());
+        cacheInfo = endName+1;  // Skip the |
+        while (*cacheInfo==' ')
+            cacheInfo++;
+        for (;;)
+        {
+            bool inNodeCache = (*cacheInfo=='*');
+            NodeType nodeType = NodeNone;
+            if (inNodeCache)
+            {
+                cacheInfo++;
+                switch (*cacheInfo)
+                {
+                case 'R': nodeType = NodeBranch; break;
+                case 'L': nodeType = NodeLeaf; break;
+                case 'B': nodeType = NodeBlob; break;
+                default:
+                    throwUnexpectedX("Unknown node type");
+                }
+                cacheInfo++;
+            }
+            __uint64 startPage = readPage(cacheInfo);
+            __uint64 endPage;
+            if (*cacheInfo=='-')
+            {
+                cacheInfo++;
+                endPage = readPage(cacheInfo);
+            }
+            else
+                endPage = startPage;
+            offset_t startOffset = startPage << CacheInfoEntry::pageBits;
+            offset_t endOffset = (endPage+1) << CacheInfoEntry::pageBits;
+            if (!callback->warmBlock(fileName.str(), nodeType, startOffset, endOffset))
+            {
+                while (*cacheInfo && *cacheInfo != '\n')
+                    cacheInfo++;
+                break;
+            }
+            if (*cacheInfo != ' ')
+                break;
+            cacheInfo++;
+        }
+        callback->endFile();
+        if (*cacheInfo != '\n')
+            break;
+        cacheInfo++;
+    }
+    assert(!*cacheInfo);
+    return(*cacheInfo == '\0');
+}
+
+#ifdef _STANDALONE_CCDCACHE
+// See example code at https://github.com/sublimehq/mmap-example/blob/master/read_mmap.cc
+
+thread_local volatile bool sigbus_jmp_set;
+thread_local sigjmp_buf sigbus_jmp_buf;
+
+static void handle_sigbus(int c)
+{
+    // Only handle the signal if the jump point is set on this thread
+    if (sigbus_jmp_set)
+    {
+        sigbus_jmp_set = false;
+
+        // siglongjmp out of the signal handler, returning the signal
+        siglongjmp(sigbus_jmp_buf, c);
+    }
+}
+
+static void install_signal_handlers()
+{
+    // Install signal handler for SIGBUS
+    struct sigaction act;
+    act.sa_handler = &handle_sigbus;
+
+    // SA_NODEFER is required due to siglongjmp
+    act.sa_flags = SA_NODEFER;
+    sigemptyset(&act.sa_mask); // Don't block any signals
+
+    // Connect the signal
+    sigaction(SIGBUS, &act, nullptr);
+}
+
+static bool testErrors = false;
+static bool includeInCacheIndexes = false;
+static size_t os_page_size = getpagesize();
+
+class StandaloneCacheWarmer : implements ICacheWarmer
+{
+    unsigned traceLevel;
+    unsigned filesTouched = 0;
+    unsigned pagesTouched = 0;
+    char *file_mmap = nullptr;
+    int fd = -1;
+    struct stat file_stat;
+    char dummy = 0;
+
+    void warmRange(offset_t startOffset, offset_t endOffset)
+    {
+        do
+        {
+            if (startOffset >= (offset_t) file_stat.st_size)
+                break;    // Let's not core if the file has changed size since we recorded the info...
+            dummy += file_mmap[startOffset];
+            if (testErrors)
+                raise(SIGBUS);
+            pagesTouched++;
+            startOffset += os_page_size;
+        }
+        while (startOffset < endOffset);
+    }
+public:
+    StandaloneCacheWarmer(unsigned _traceLevel) : traceLevel(_traceLevel) {}
+
+    virtual void startFile(const char *filename) override
+    {
+        file_mmap = nullptr;
+        fd = open(filename, 0);
+        if (fd != -1)
+        {
+            fstat(fd, &file_stat);
+            file_mmap = (char *) mmap((void *)0, file_stat.st_size, PROT_READ, MAP_SHARED, fd, 0);
+            if (file_mmap == MAP_FAILED)
+            {
+                printf("Failed to map file %s to pre-warm cache (error %d)\n", filename, errno);
+                file_mmap = nullptr;
+            }
+            else
+                filesTouched++;
+        }
+        else if (traceLevel)
+        {
+            printf("Failed to open file %s to pre-warm cache (error %d)\n", filename, errno);
+        }
+    }
+
+    virtual bool warmBlock(const char *filename, NodeType nodeType, offset_t startOffset, offset_t endOffset) override
+    {
+        if (!includeInCacheIndexes && nodeType != NodeNone)
+            return true;
+        if (traceLevel > 8)
+            printf("Touching %s %" I64F "x-%" I64F "x\n", filename, startOffset, endOffset);
+        if (file_mmap)
+        {
+            sigbus_jmp_set = true;
+            if (sigsetjmp(sigbus_jmp_buf, 0) == 0)
+            {
+                warmRange(startOffset, endOffset);
+            }
+            else
+            {
+                if (traceLevel)
+                    printf("SIGBUF caught while trying to touch file %s at offset %" I64F "x\n", filename, startOffset);
+                sigbus_jmp_set = false;
+                return false;
+            }
+            sigbus_jmp_set = false;
+            return true;
+        }
+        else
+            return false;
+    }
+
+    virtual void endFile() override
+    {
+        if (file_mmap)
+            munmap(file_mmap, file_stat.st_size);
+        if (fd != -1)
+            close(fd);
+        fd = -1;
+        file_mmap = nullptr;
+    }
+
+    virtual void report() override
+    {
+        if (traceLevel)
+            printf("Touched %u pages from %u files (dummyval %u)\n", pagesTouched, filesTouched, dummy);  // We report dummy to make sure that compiler doesn't decide to optimize it away entirely
+    }
+};
+
+static void usage()
+{
+    printf("Usage: ccdcache <options> filename\n");
+    printf("Options:\n");
+    printf("  -t  traceLevel\n");
+    printf("  -i  Include in-cache index files too\n");
+    exit(2);
+}
+
+int main(int argc, const char **argv)
+{
+    if (argc < 2)
+        usage();
+    int arg = 1;
+    const char *cacheFileName = nullptr;
+    unsigned traceLevel = 1;
+    while (arg < argc)
+    {
+        if (streq(argv[arg], "-t") || streq(argv[arg], "--traceLevel"))
+        {
+            arg++;
+            if (arg == argc)
+                usage();
+            traceLevel = atoi(argv[arg]);
+        }
+        else if (streq(argv[arg], "-e") || streq(argv[arg], "--testErrors"))
+        {
+            testErrors = true;
+        }
+        else if (streq(argv[arg], "-i") || streq(argv[arg], "--includecachedindexes"))
+        {
+            includeInCacheIndexes = true;
+        }
+        else if (*(argv[arg]) == '-' || cacheFileName != nullptr)
+            usage();
+        else
+            cacheFileName = argv[arg];
+        arg++;
+    }
+    StringBuffer cacheInfo;
+    install_signal_handlers();
+    StandaloneCacheWarmer warmer(traceLevel);
+    try
+    {
+        if (checkFileExists(cacheFileName))
+        {
+             if (traceLevel)
+                printf("Loading cache information from %s\n", cacheFileName);
+            cacheInfo.loadFile(cacheFileName, false);
+            if (!warmOsCache(cacheInfo, &warmer))
+                printf("WARNING: Unrecognized cacheInfo format in file %s\n", cacheFileName);
+            warmer.report();
+        }
+    }
+    catch(IException *E)
+    {
+        EXCLOG(E);
+        E->Release();
+    }
+}
+#endif
+

+ 83 - 0
roxie/ccdcache/ccdcache.hpp

@@ -0,0 +1,83 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#ifndef _CCDCACHE_INCL
+#define _CCDCACHE_INCL
+
+struct CacheInfoEntry
+{
+    //For convenience the values for PageType match the NodeX enumeration (see noteWarm).
+    //Ensure disk entries sort last so that index nodes take precedence when deduping offsets.
+    enum PageType : unsigned
+    {
+        PageTypeBranch = 0,
+        PageTypeLeaf = 1,
+        PageTypeBlob = 2,
+        PageTypeDisk = 3,
+    };
+
+    union
+    {
+        struct
+        {
+#ifndef _WIN32
+            unsigned type: 2;    // disk or the kind of index node
+            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
+            unsigned file: 24;   // Up to 4 million files
+#else
+//Windows does not like packing bitfields with different base types - fails the static assert
+            __uint64 type: 2;    // disk or the kind of index node
+            __uint64 page: 38;   // Support file sizes up to 2^51 i.e. 2PB
+            __uint64 file: 24;   // Up to 16 million files
+#endif
+        } b;
+        __uint64 u;
+    };
+
+#ifndef _WIN32
+    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
+#elif _MSC_VER >= 1900
+    //Older versions of the windows compiler complain CacheInfoEntry::b is not a type name
+    static_assert(sizeof(b) == sizeof(u), "Unexpected packing issue in CacheInfoEntry");
+#endif
+
+    inline CacheInfoEntry() { u = 0; }
+    inline CacheInfoEntry(unsigned _file, offset_t _pos, PageType pageType)
+    {
+        b.file = _file;
+        b.page = _pos >> pageBits;
+        b.type = pageType;
+    }
+    inline bool operator < ( const CacheInfoEntry &l) const { return u < l.u; }
+    inline bool operator <= ( const CacheInfoEntry &l) const { return u <= l.u; }
+    inline void operator++ () { b.page++; }
+
+    static constexpr unsigned pageBits = 13;  // 8k 'pages'
+};
+
+interface ICacheWarmer
+{
+    virtual void startFile(const char *filename) = 0;
+    virtual bool warmBlock(const char *fileName, NodeType nodeType, offset_t startOffset, offset_t endOffset) = 0;
+    virtual void endFile() = 0;
+    virtual void report() = 0;
+};
+
+extern bool warmOsCache(const char *cacheInfo, ICacheWarmer *warmer);
+
+
+#endif //_CCDCACHE_INCL

+ 32 - 70
roxie/roxiemem/roxiemem.cpp

@@ -748,10 +748,8 @@ static void *suballoc_aligned(size32_t pages, bool returnNullWhenExhausted)
                             //Check if all memory above this allocation is allocated - if so extend the HWM
                             if ((hbi == 0) && (i+1 == heapHWM))
                             {
-                                if (startHbi != 0)
-                                    heapHWM = start+1;
-                                else
-                                    heapHWM = start;
+                                //Could be some space in the first chunk - will be checked next time allocate is called.
+                                heapHWM = start+1;
                             }
 
                             if (memTraceLevel >= 2)
@@ -775,25 +773,25 @@ static void *suballoc_aligned(size32_t pages, bool returnNullWhenExhausted)
     return NULL;
 }
 
-static void subfree_aligned(void *ptr, unsigned pages = 1)
+static bool subfree_aligned(void *ptr, unsigned pages = 1)
 {
     unsigned _pages = pages;
     memsize_t offset = (char *)ptr - heapBase;
     memsize_t pageOffset = offset / HEAP_ALIGNMENT_SIZE;
     if (!pages)
     {
-        DBGLOG("RoxieMemMgr: Invalid parameter (pages=%u) to subfree_aligned", pages);
-        HEAPERROR("RoxieMemMgr: Invalid parameter (num pages) to subfree_aligned");
+        IERRLOG("RoxieMemMgr: Invalid parameter (pages=%u) to subfree_aligned", pages);
+        return false;
     }
     if (pageOffset + pages > heapTotalPages)
     {
-        DBGLOG("RoxieMemMgr: Freed area not in heap (ptr=%p)", ptr);
-        HEAPERROR("RoxieMemMgr: Freed area not in heap");
+        IERRLOG("RoxieMemMgr: Freed area not in heap (ptr=%p)", ptr);
+        return false;
     }
     if (pageOffset*HEAP_ALIGNMENT_SIZE != offset)
     {
-        DBGLOG("RoxieMemMgr: Incorrect alignment of freed area (ptr=%p)", ptr);
-        HEAPERROR("RoxieMemMgr: Incorrect alignment of freed area");
+        IERRLOG("RoxieMemMgr: Incorrect alignment of freed area (ptr=%p)", ptr);
+        return false;
     }
     if (heapNotifyUnusedEachFree)
         notifyMemoryUnused(ptr, pages*HEAP_ALIGNMENT_SIZE);
@@ -829,7 +827,10 @@ static void subfree_aligned(void *ptr, unsigned pages = 1)
         {
             heap_t prev = heapBitmap[wordOffset];
             if (unlikely((prev & mask) != 0))
-                HEAPERROR("RoxieMemMgr: Page freed twice");
+            {
+                IERRLOG("RoxieMemMgr: Page freed twice (ptr=%p)", ptr);
+                return false;
+            }
 
             heap_t next = prev | mask;
             heapBitmap[wordOffset] = next;
@@ -859,6 +860,7 @@ static void subfree_aligned(void *ptr, unsigned pages = 1)
 
     if (memTraceLevel >= 2)
         DBGLOG("RoxieMemMgr: subfree_aligned() %u pages ok - addr=%p heapLWM=%u heapHWM=%u totalPages=%u", _pages, ptr, heapLWM, heapHWM, heapTotalPages);
+    return true;
 }
 
 static void clearBits(unsigned start, unsigned len)
@@ -2766,7 +2768,7 @@ public:
 
 protected:
     static const unsigned maxRows = 16; // Maximum number of rows to allocate at once.
-    char * rows[maxRows];
+    char * rows[maxRows]; // Deliberately uninitialized
     unsigned curRow = 0;
     unsigned numRows = 0;
 };
@@ -7091,64 +7093,21 @@ protected:
         r = subrealloc_aligned(t1, 20, 60);
         ASSERT(r==(void *)(memsize_t)(maxAddr - HEAP_ALIGNMENT_SIZE*60));
         subfree_aligned(r, 60);
-        subfree_aligned(t3, 20);
+        bool ok = subfree_aligned(t3, 20);
+        ASSERT(ok);
 
         // Check some error cases
-        try
-        {
-            subfree_aligned((void*)0, 1);
-            ASSERT(false);
-        }
-        catch (IException *E)
-        {
-            StringBuffer s;
-            ASSERT(strcmp(E->errorMessage(s).str(), "RoxieMemMgr: Freed area not in heap")==0);
-            E->Release();
-        }
-        try
-        {
-            subfree_aligned((void*)(minAddr + HEAP_ALIGNMENT_SIZE / 2), 1);
-            ASSERT(false);
-        }
-        catch (IException *E)
-        {
-            StringBuffer s;
-            ASSERT(strcmp(E->errorMessage(s).str(), "RoxieMemMgr: Incorrect alignment of freed area")==0);
-            E->Release();
-        }
-        try
-        {
-            subfree_aligned((void*)(memsize_t)(minAddr + 20 * HEAP_ALIGNMENT_SIZE), 1);
-            ASSERT(false);
-        }
-        catch (IException *E)
-        {
-            StringBuffer s;
-            ASSERT(strcmp(E->errorMessage(s).str(), "RoxieMemMgr: Page freed twice")==0);
-            E->Release();
-        }
-        try
-        {
-            subfree_aligned((void*)(memsize_t)(maxAddr - 2 * HEAP_ALIGNMENT_SIZE), 3);
-            ASSERT(false);
-        }
-        catch (IException *E)
-        {
-            StringBuffer s;
-            ASSERT(strcmp(E->errorMessage(s).str(), "RoxieMemMgr: Freed area not in heap")==0);
-            E->Release();
-        }
-        try
-        {
-            subfree_aligned((void*)(memsize_t)0xbfe00000, 0);
-            ASSERT(false);
-        }
-        catch (IException *E)
-        {
-            StringBuffer s;
-            ASSERT(strcmp(E->errorMessage(s).str(), "RoxieMemMgr: Invalid parameter (num pages) to subfree_aligned")==0);
-            E->Release();
-        }
+        ok = subfree_aligned((void*)0, 1);
+        ASSERT(!ok);
+        ok = subfree_aligned((void*)(minAddr + HEAP_ALIGNMENT_SIZE / 2), 1);
+        ASSERT(!ok);
+        ok = subfree_aligned((void*)(memsize_t)(minAddr + 20 * HEAP_ALIGNMENT_SIZE), 1);
+        ASSERT(!ok);
+        ok = subfree_aligned((void*)(memsize_t)(maxAddr - 2 * HEAP_ALIGNMENT_SIZE), 3);
+        ASSERT(!ok);
+        ok = subfree_aligned((void*)(memsize_t)0xbfe00000, 0);
+        ASSERT(!ok);
+
         delete[] heapBitmap;
     }
 
@@ -7448,7 +7407,8 @@ protected:
             unsigned i = 0;
             for (;;)
             {
-                ASSERT(rm1->allocate(i++, 0) != NULL);
+                void * ret = rm1->allocate(i++, 0);
+                ASSERT(ret != NULL);
             }
         }
         catch (IException *E)
@@ -7713,6 +7673,8 @@ protected:
         Owned<IRowManager> rowManager = createRowManager(0, NULL, logctx, NULL, false);
         CountingRowAllocatorCache rowCache;
         void * memory = suballoc_aligned(1, true);
+        assertex(memory);
+
         unsigned heapFlags = 0;
         CFixedChunkedHeap dummyHeap((CChunkingRowManager*)rowManager.get(), logctx, &rowCache, 32, 0, SpillAllCost);
         FixedSizeHeaplet * heaplet = new (memory) FixedSizeHeaplet(&dummyHeap, &rowCache, 32, heapFlags);

+ 3 - 2
roxie/topo/toposerver.cpp

@@ -338,10 +338,11 @@ int main(int argc, const char *argv[])
         E->Release();
         return EXIT_FAILURE;
     }
-    Owned<IFile> sentinelFile = createSentinelTarget();
-    removeSentinelFile(sentinelFile);
     try
     {
+        Owned<IFile> sentinelFile = createSentinelTarget();
+        removeSentinelFile(sentinelFile);
+
         for (unsigned i=0; i<(unsigned)argc; i++)
         {
             if (stricmp(argv[i], "--help")==0 ||

+ 87 - 0
system/jlib/jhash.hpp

@@ -20,8 +20,13 @@
 #ifndef JHASH_HPP
 #define JHASH_HPP
 
+#include <functional>
+#include <unordered_map>
+#include <utility>
+
 #include "platform.h"
 #include <stdio.h>
+#include "jdebug.hpp" // for get_cycles_now()
 #include "jiface.hpp"
 #include "jobserve.hpp"
 #include "jiter.hpp"
@@ -598,4 +603,86 @@ public:
 };
 
 
+/* 
+ * A HT/Cache implementation whose items are only valid for a defined timeout period (default 30 secs)
+ * Example use:
+ *   CTimeLimitedCache<std::string, Owned<IPropertyTree>> myCache(10);
+ *   Owned<IPropertyTree> match;
+ *   verifyex( !myCache.get("tree1", match) );
+ *   myCache.add("tree1", createPTree("tree1"));
+ *   verifyex( nullptr != myCache.query("tree1") );
+ *   MilliSleep(11000); // sleep until "tree1" has expired
+ *   verifyex( nullptr == myCache.query("tree1") );
+ *
+ *   myCache.ensure("tree2", [](std::string key) { return createPTree(key.c_str()); });
+ *
+ * Keywords: cache,time,limit,hashtable,hash
+ */
+
+template <class KEYTYPE, class VALUETYPE>
+class jlib_decl CTimeLimitedCache
+{
+public:
+    CTimeLimitedCache<KEYTYPE, VALUETYPE>(unsigned timeoutMs=defaultCacheTimeoutMs)
+    {
+        timeoutPeriodCycles = ((cycle_t)timeoutMs) * queryOneSecCycles() / 1000;
+    }
+    VALUETYPE *query(KEYTYPE key, bool touch=false)
+    {
+        CacheElement *match = getMatch(key, touch);
+        if (!match)
+            return nullptr;
+        return &match->second;
+    }
+    bool get(KEYTYPE key, VALUETYPE &result, bool touch=false)
+    {
+        VALUETYPE *res = query(key, touch);
+        if (!res)
+            return false;
+        result = *res;
+        return true;
+    }
+    VALUETYPE &add(KEYTYPE key, VALUETYPE val)
+    {
+        auto &ref = ht[key];
+        ref = std::make_pair(get_cycles_now(), val);
+        return ref.second;
+    }
+    VALUETYPE &ensure(KEYTYPE key, std::function<VALUETYPE (KEYTYPE k)> func)
+    {
+        VALUETYPE *res = query(key);
+        if (res)
+            return *res;
+        return add(key, func(key));
+    }
+    void kill()
+    {
+        // NB: std::unordered_map clear() does not free the map memory (or call dtors) until it is out of scope
+        std::unordered_map<KEYTYPE, CacheElement> empty;
+        empty.swap(ht);
+    }
+
+private:
+    static constexpr unsigned defaultCacheTimeoutMs = 30000;
+    typedef std::pair<cycle_t, VALUETYPE> CacheElement;
+    cycle_t timeoutPeriodCycles = 0;
+    std::unordered_map<KEYTYPE, CacheElement> ht;
+
+    CacheElement *getMatch(KEYTYPE key, bool touch)
+    {
+        auto it = ht.find(key);
+        if (it == ht.end())
+            return nullptr;
+        cycle_t nowCycles = get_cycles_now();
+        if ((nowCycles - it->second.first) > timeoutPeriodCycles) // NB: rollover is okay
+        {
+            ht.erase(it);
+            return nullptr;
+        }
+        if (touch)
+            it->second.first = nowCycles;
+        return &it->second;
+    }
+};
+
 #endif

+ 0 - 2
system/jlib/jlib.hpp

@@ -275,8 +275,6 @@ enum DAFSConnectCfg { SSLNone = 0, SSLOnly, SSLFirst, UnsecureFirst };
 
 #include "jstring.hpp"
 #include "jarray.hpp"
-#include "jhash.hpp"
-#include "jstream.hpp"
 #include "jutil.hpp"
 
 template <class ARRAY>

+ 1 - 0
system/jlib/jlog.hpp

@@ -856,6 +856,7 @@ constexpr LogMsgCategory MCauditInfo(MSGAUD_audit, MSGCLS_information, AudMsgThr
 constexpr LogMsgCategory MCstats(MSGAUD_operator, MSGCLS_progress, ProgressMsgThreshold);
 constexpr LogMsgCategory MCoperatorInfo(MSGAUD_operator, MSGCLS_information, InfoMsgThreshold);
 constexpr LogMsgCategory MCmetrics(MSGAUD_operator, MSGCLS_metric, ErrMsgThreshold);
+constexpr LogMsgCategory MCExtraneousInfo(MSGAUD_programmer, MSGCLS_information, ExtraneousMsgThreshold);
 
 /*
  * Function to determine log level (detail) for exceptions, based on log message class

+ 2 - 0
testing/regress/ecl/dropzone_query_test.ecl

@@ -22,3 +22,5 @@ import Std.File AS FileServices;
 output(NOTHOR(FileServices.GetDefaultDropZone()));
 
 output(NOTHOR(FileServices.GetDropZones()));
+
+output(NOTHOR(FileServices.GetLandingZones()));

+ 12 - 5
testing/regress/ecl/memcachedtest.ecl

@@ -87,12 +87,19 @@ SEQUENTIAL(
 
 SEQUENTIAL(
     memcached.Clear(servers);
+    // There is not hi-res timer in memcached, so all timeout should handled with +/-1 s tolerance
     memcached.SetString('testExpire', 'foobar', servers,, 10);
-    memcached.Exists('testExpire', servers);
-    Std.System.Debug.Sleep(9 * 1000);
-    memcached.Exists('testExpire', servers);
-    Std.System.Debug.Sleep(2 * 1000);
-    memcached.Exists('testExpire', servers);
+    memcached.Exists('testExpire', servers);	// Result 14
+    
+    // Wait 8 sec before test again
+    Std.System.Debug.Sleep(8 * 1000);
+    // It should exists at this point
+    memcached.Exists('testExpire', servers);	// Result 15
+    
+    // Wait 4 sec before test again
+    Std.System.Debug.Sleep(4 * 1000);
+    // Shouldn't exists at this point
+    memcached.Exists('testExpire', servers);	// Result 16
     );
 
 SEQUENTIAL(

+ 1 - 1
testing/regress/environment.xml.in

@@ -86,7 +86,7 @@
                 logQueueLen="512"
                 lowTimeout="10000"
                 maxBlockSize="10000000"
-                maxHttpConnectionRequests="10"
+                maxHttpConnectionRequests="0"
                 maxHttpKeepAliveWait="5000"
                 maxLocalFilesOpen="4000"
                 maxLockAttempts="5"

+ 34 - 0
testing/unittests/jlibtests.cpp

@@ -2575,6 +2575,40 @@ public:
 CPPUNIT_TEST_SUITE_REGISTRATION( JlibStatsTest );
 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( JlibStatsTest, "JlibStatsTest" );
 
+class HashTableTests : public CppUnit::TestFixture
+{
+    CPPUNIT_TEST_SUITE( HashTableTests );
+        CPPUNIT_TEST(testTimedCache);
+    CPPUNIT_TEST_SUITE_END();
+
+    void testTimedCache()
+    {
+        unsigned hv = 0;
+        unsigned __int64 inputHvSum = 0;
+        unsigned __int64 lookupHvSum = 0;
+        CTimeLimitedCache<unsigned, unsigned> cache(100); // 100ms timeout
+        for (unsigned i=0; i<10; i++)
+        {
+            hv = hashc((const byte *)&i,sizeof(i), hv);
+            inputHvSum += hv;
+            cache.add(i, hv);
+            unsigned lookupHv;
+            CPPUNIT_ASSERT(cache.get(i, lookupHv));
+            lookupHvSum += lookupHv;
+        }
+        CPPUNIT_ASSERT(inputHvSum == lookupHvSum);
+        MilliSleep(50);
+        CPPUNIT_ASSERT(nullptr != cache.query(0, true)); // touch
+        MilliSleep(60);
+        // all except 0 that was touched should have expired
+        CPPUNIT_ASSERT(nullptr != cache.query(0));
+        for (unsigned i=1; i<10; i++)
+            CPPUNIT_ASSERT(nullptr == cache.query(i));
+    }
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION( HashTableTests );
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( HashTableTests, "HashTableTests" );
 
 
 #endif // _USE_CPPUNIT

+ 2 - 1
thorlcr/activities/diskread/thdiskreadslave.cpp

@@ -88,7 +88,7 @@ protected:
 
         unsigned projectedFormatCrc = helper->getProjectedFormatCrc();
         IOutputMetaData *projectedFormat = helper->queryProjectedDiskRecordSize();
-        return ::getTranslators("rowstream", expectedFormatCrc, expectedFormat, publishedFormatCrc, publishedFormat, projectedFormatCrc, projectedFormat, translationMode);
+        return ::getTranslators(logicalFilename, expectedFormatCrc, expectedFormat, publishedFormatCrc, publishedFormat, projectedFormatCrc, projectedFormat, translationMode);
     }
 public:
     CDiskReadSlaveActivityRecord(CGraphElementBase *_container, IHThorArg *_helper=NULL) 
@@ -411,6 +411,7 @@ void CDiskRecordPartHandler::close(CRC32 &fileCRC)
     if (partStream)
     {
         mergeStats(fileStats, partStream);
+        activity.mergeSubFileStats(partDesc, partStream);
         partStream->stop(&fileCRC);
     }
 }

+ 1 - 1
thorlcr/activities/indexread/thindexreadslave.cpp

@@ -142,7 +142,7 @@ protected:
         unsigned expectedFormatCrc = helper->getDiskFormatCrc();
         IOutputMetaData *expectedFormat = helper->queryDiskRecordSize();
 
-        Owned<ITranslator> ret = ::getTranslators("rowstream", expectedFormatCrc, expectedFormat, publishedFormatCrc, publishedFormat, projectedFormatCrc, projectedFormat, translationMode);
+        Owned<ITranslator> ret = ::getTranslators(logicalFilename, expectedFormatCrc, expectedFormat, publishedFormatCrc, publishedFormat, projectedFormatCrc, projectedFormat, translationMode);
         if (!ret)
             return nullptr;
         if (!ret->queryTranslator().canTranslate())

+ 38 - 9
thorlcr/activities/thdiskbase.cpp

@@ -68,20 +68,21 @@ void CDiskReadMasterBase::init()
             mapping.setown(getFileSlaveMaps(file->queryLogicalName(), *fileDesc, container.queryJob().queryUserDescriptor(), container.queryJob().querySlaveGroup(), local, false, hash, file->querySuperFile()));
             addReadFile(file, temp);
         }
+        IDistributedSuperFile *super = file->querySuperFile();
+        unsigned numsubs = super?super->numSubFiles(true):0;
         if (0 != (helper->getFlags() & TDRfilenamecallback)) // only get/serialize if using virtual file name fields
         {
-            IDistributedSuperFile *super = file->querySuperFile();
-            if (super)
+            for (unsigned s=0; s<numsubs; s++)
             {
-                unsigned numsubs = super->numSubFiles(true);
-                unsigned s=0;
-                for (; s<numsubs; s++)
-                {
-                    IDistributedFile &subfile = super->querySubFile(s, true);
-                    subfileLogicalFilenames.append(subfile.queryLogicalName());
-                }
+                IDistributedFile &subfile = super->querySubFile(s, true);
+                subfileLogicalFilenames.append(subfile.queryLogicalName());
             }
         }
+        if (0==(helper->getFlags() & TDXtemporary))
+        {
+            for (unsigned i=0; i<numsubs; i++)
+                subFileStats.push_back(new CThorStatsCollection(diskReadRemoteStatistics));
+        }
         void *ekey;
         size32_t ekeylen;
         helper->getEncryptKey(ekeylen,ekey);
@@ -116,6 +117,34 @@ void CDiskReadMasterBase::serializeSlaveData(MemoryBuffer &dst, unsigned slave)
         CSlavePartMapping::serializeNullMap(dst);
 }
 
+void CDiskReadMasterBase::done()
+{
+    if (!subFileStats.empty())
+    {
+        unsigned numSubFiles = subFileStats.size();
+        for (unsigned i=0; i<numSubFiles; i++)
+        {
+            IDistributedFile *file = queryReadFile(i);
+            if (file)
+                file->addAttrValue("@numDiskReads", subFileStats[i]->getStatisticSum(StNumDiskReads));
+        }
+    }
+    else
+    {
+        IDistributedFile *file = queryReadFile(0);
+        if (file)
+            file->addAttrValue("@numDiskReads", statsCollection.getStatisticSum(StNumDiskReads));
+    }
+    CMasterActivity::done();
+}
+
+void CDiskReadMasterBase::deserializeStats(unsigned node, MemoryBuffer &mb)
+{
+    CMasterActivity::deserializeStats(node, mb);
+
+    for (auto &stats: subFileStats)
+        stats->deserialize(node, mb);
+}
 /////////////////
 
 void CWriteMasterBase::init()

+ 5 - 2
thorlcr/activities/thdiskbase.ipp

@@ -32,12 +32,15 @@ protected:
     Owned<CSlavePartMapping> mapping;
     IHash *hash;
     StringAttr fileName;
+    std::vector<OwnedPtr<CThorStatsCollection>> subFileStats;
 
 public:
     CDiskReadMasterBase(CMasterGraphElement *info);
-    virtual void init();
-    virtual void serializeSlaveData(MemoryBuffer &dst, unsigned slave);
+    virtual void init() override;
+    virtual void serializeSlaveData(MemoryBuffer &dst, unsigned slave) override;
+    virtual void done() override;
     virtual void validateFile(IDistributedFile *file) { }
+    virtual void deserializeStats(unsigned node, MemoryBuffer &mb) override;
 };
 
 class CWriteMasterBase : public CMasterActivity

+ 29 - 5
thorlcr/activities/thdiskbaseslave.cpp

@@ -71,7 +71,6 @@ CDiskPartHandlerBase::CDiskPartHandlerBase(CDiskReadSlaveActivityBase &_activity
     eoi = false;
     kindStr = activityKindStr(activity.queryContainer().getKind());
     compressed = blockCompressed = firstInGroup = checkFileCrc = false;
-
 }
 
 void CDiskPartHandlerBase::setPart(IPartDescriptor *_partDesc)
@@ -235,13 +234,36 @@ void CDiskReadSlaveActivityBase::init(MemoryBuffer &data, MemoryBuffer &slaveDat
     {
         deserializePartFileDescriptors(data, partDescs);
 
-        // put temp files in individual slave temp dirs (incl port)
-        if ((helper->getFlags() & TDXtemporary) && (!container.queryJob().queryUseCheckpoints()))
-            partDescs.item(0).queryOwner().setDefaultDir(queryTempDir());
+        if (helper->getFlags() & TDXtemporary)
+        {
+            // put temp files in individual slave temp dirs (incl port)
+            if (!container.queryJob().queryUseCheckpoints())
+                partDescs.item(0).queryOwner().setDefaultDir(queryTempDir());
+        }
+        else
+        {
+            ISuperFileDescriptor *super = partDescs.item(0).queryOwner().querySuperFileDescriptor();
+            if (super)
+            {
+                unsigned numSubFiles = super->querySubFiles();
+                for (unsigned i=0; i<numSubFiles; i++)
+                    subFileStats.push_back(new CRuntimeStatisticCollection(diskReadRemoteStatistics));
+            }
+        }
     }
     gotMeta = false; // if variable filename and inside loop, need to invalidate cached meta
 }
-
+void CDiskReadSlaveActivityBase::mergeSubFileStats(IPartDescriptor *partDesc, IExtRowStream *partStream)
+{
+    if (subFileStats.size()>0)
+    {
+        ISuperFileDescriptor * superFDesc = partDesc->queryOwner().querySuperFileDescriptor();
+        dbgassertex(superFDesc);
+        unsigned subfile, lnum;
+        if(superFDesc->mapSubPart(partDesc->queryPartIndex(), subfile, lnum))
+            mergeStats(*subFileStats[subfile], partStream);
+    }
+}
 const char *CDiskReadSlaveActivityBase::queryLogicalFilename(unsigned index)
 {
     return subfileLogicalFilenames.item(index);
@@ -300,6 +322,8 @@ void CDiskReadSlaveActivityBase::serializeStats(MemoryBuffer &mb)
     }
     stats.setStatistic(StNumDiskRowsRead, diskProgress);
     PARENT::serializeStats(mb);
+    for (auto &stats: subFileStats)
+        stats->serialize(mb);
 }
 
 

+ 2 - 1
thorlcr/activities/thdiskbaseslave.ipp

@@ -92,14 +92,15 @@ protected:
     Owned<CDiskPartHandlerBase> partHandler;
     Owned<IExpander> eexp;
     rowcount_t diskProgress = 0;
+    std::vector<OwnedPtr<CRuntimeStatisticCollection>> subFileStats;
 
 public:
     CDiskReadSlaveActivityBase(CGraphElementBase *_container, IHThorArg *_helper);
     const char *queryLogicalFilename(unsigned index);
     IThorRowInterfaces * queryProjectedDiskRowInterfaces();
+    void mergeSubFileStats(IPartDescriptor *partDesc, IExtRowStream *partStream);
     virtual void start() override;
 
-    
 // IThorSlaveActivity
     virtual void init(MemoryBuffer &data, MemoryBuffer &slaveData);
     virtual void kill();

+ 22 - 4
thorlcr/master/thmastermain.cpp

@@ -72,6 +72,20 @@
 #define SHUTDOWN_IN_PARALLEL 20
 
 
+/* These percentages are used to determine the amount roxiemem allocated
+ * from total system memory.
+ *
+ * For historical reasons the default in bare-metal has always been a
+ * conservative 75%.
+ *
+ * NB: These percentages do not apply if the memory amount has been configured
+ * manually via 'globalMemorySize' and 'masterMemorySize'
+ */
+
+static constexpr unsigned bareMetalRoxieMemPC = 75;
+static constexpr unsigned containerRoxieMemPC = 90;
+
+
 class CThorEndHandler : implements IThreaded
 {
     CThreaded threaded;
@@ -736,11 +750,13 @@ int main( int argc, const char *argv[]  )
         unsigned gmemSize = globals->getPropInt("@globalMemorySize"); // in MB
         if (0 == gmemSize)
         {
+            // NB: This could be in a isContainerized(), but the 'workerResources' section only applies to containerized setups
             const char *workerResourcedMemory = globals->queryProp("workerResources/@memory");
             if (!isEmptyString(workerResourcedMemory))
             {
                 offset_t sizeBytes = friendlyStringToSize(workerResourcedMemory);
                 gmemSize = (unsigned)(sizeBytes / 0x100000);
+                gmemSize = gmemSize * containerRoxieMemPC / 100;
             }
             else
             {
@@ -765,17 +781,17 @@ int main( int argc, const char *argv[]  )
 #endif            
 #endif
 #endif
-#ifndef _CONTAINERIZED
+#ifdef _CONTAINERIZED
+                gmemSize = maxMem * containerRoxieMemPC / 100; // NB: MB's
+#else
                 if (globals->getPropBool("@localThor") && 0 == mmemSize)
                 {
                     gmemSize = maxMem / 2; // 50% of total for slaves
                     mmemSize = maxMem / 4; // 25% of total for master
                 }
                 else
+                    gmemSize = maxMem * bareMetalRoxieMemPC / 100; // NB: MB's
 #endif
-                {
-                    gmemSize = maxMem * 3 / 4; // 75% of total for slaves
-                }
             }
             unsigned perSlaveSize = gmemSize;
 #ifndef _CONTAINERIZED
@@ -796,11 +812,13 @@ int main( int argc, const char *argv[]  )
         }
         if (0 == mmemSize)
         {
+            // NB: This could be in a isContainerized(), but the 'managerResources' section only applies to containerized setups
             const char *managerResourcedMemory = globals->queryProp("managerResources/@memory");
             if (!isEmptyString(managerResourcedMemory))
             {
                 offset_t sizeBytes = friendlyStringToSize(managerResourcedMemory);
                 mmemSize = (unsigned)(sizeBytes / 0x100000);
+                mmemSize = mmemSize * containerRoxieMemPC / 100;
             }
             else
                 mmemSize = gmemSize; // default to same as slaves