Browse Source

Merge pull request #10819 from richardkchapman/hpcc13026

HPCC-13026 Allow eclcc to do a better job of resourcing smaller clusters

Reviewed-by: Gavin Halliday <ghalliday@hpccsystems.com>
Gavin Halliday 7 years ago
parent
commit
ecf704dae4
6 changed files with 89 additions and 22 deletions
  1. 69 22
      ecl/eclcc/eclcc.cpp
  2. 1 0
      ecl/eclcc/eclcc.hpp
  3. 1 0
      ecl/eclccserver/eclccserver.cpp
  4. 7 0
      ecl/hql/hql.hpp
  5. 3 0
      ecl/hqlcpp/hqlecl.cpp
  6. 8 0
      ecl/hqlcpp/hqlhtcpp.cpp

+ 69 - 22
ecl/eclcc/eclcc.cpp

@@ -257,11 +257,15 @@ public:
 
     // interface ICodegenContextCallback
 
-    virtual void noteCluster(const char *clusterName);
-    virtual bool allowAccess(const char * category, bool isSigned);
+    virtual void noteCluster(const char *clusterName) override;
+    virtual void pushCluster(const char *clusterName) override;
+    virtual void popCluster() override;
+    virtual bool allowAccess(const char * category, bool isSigned) override;
     virtual IHqlExpression *lookupDFSlayout(const char *filename, IErrorReceiver &errs, const ECLlocation &location, bool isOpt) const override;
+    virtual unsigned lookupClusterSize() const override;
 
 protected:
+    bool checkDaliConnected() const;
     void addFilenameDependency(StringBuffer & target, EclCompileInstance & instance, const char * filename);
     void applyApplicationOptions(IWorkUnit * wu);
     void applyDebugOptions(IWorkUnit * wu);
@@ -320,10 +324,13 @@ protected:
     StringAttr optQueryRepositoryReference;
     StringAttr optComponentName;
     StringAttr optDFS;
+    StringAttr optCluster;
     StringAttr optScope;
     StringAttr optUser;
     StringAttr optPassword;
     StringAttr optWUID;
+    StringArray clusters;
+    mutable int prevClusterSize = -1;  // i.e. not cached
     FILE * batchLog = nullptr;
 
     StringAttr optManifestFilename;
@@ -2013,20 +2020,21 @@ void EclCC::noteCluster(const char *clusterName)
 {
 }
 
-IHqlExpression *EclCC::lookupDFSlayout(const char *filename, IErrorReceiver &errs, const ECLlocation &location, bool isOpt) const
+void EclCC::pushCluster(const char *clusterName)
+{
+    clusters.append(clusterName);
+    prevClusterSize = -1;  // i.e. not cached
+}
+
+void EclCC::popCluster()
+{
+    clusters.pop();
+    prevClusterSize = -1;  // i.e. not cached
+}
+
+
+bool EclCC::checkDaliConnected() const
 {
-    CriticalBlock b(dfsCrit);  // Overkill at present but maybe one day codegen will start threading?
-    if (!optDFS || disconnectReported)
-    {
-        // Dali lookup disabled, yet translation requested. Should we report if OPT set?
-        if (!(optArchive || optGenerateDepend || optSyntax || optGenerateMeta || optEvaluateResult || disconnectReported))
-        {
-            VStringBuffer msg("Error looking up file %s in DFS - DFS not configured", filename);
-            errs.reportWarning(CategoryDFS, HQLWRN_DFSlookupFailure, msg.str(), str(location.sourcePath), location.lineno, location.column, location.position);
-            disconnectReported = true;
-        }
-        return nullptr;
-    }
     if (!daliConnected)
     {
         try
@@ -2034,10 +2042,8 @@ IHqlExpression *EclCC::lookupDFSlayout(const char *filename, IErrorReceiver &err
             Owned<IGroup> serverGroup = createIGroup(optDFS.str(), DALI_SERVER_PORT);
             if (!initClientProcess(serverGroup, DCR_EclCC, 0, NULL, NULL, optDaliTimeout))
             {
-                VStringBuffer msg("Error looking up file %s in DFS - failed to connect to %s", filename, optDFS.str());
-                errs.reportError(HQLWRN_DFSlookupFailure, msg.str(), str(location.sourcePath), location.lineno, location.column, location.position);
                 disconnectReported = true;
-                return nullptr;
+                return false;
             }
             if (!optUser.isEmpty())
             {
@@ -2047,16 +2053,54 @@ IHqlExpression *EclCC::lookupDFSlayout(const char *filename, IErrorReceiver &err
         }
         catch (IException *E)
         {
-            StringBuffer emsg;
-            VStringBuffer msg("Error looking up file %s in DFS - failed to connect to %s (%s)", filename, optDFS.str(), E->errorMessage(emsg).str());
             E->Release();
-            errs.reportError(HQLWRN_DFSlookupFailure, msg.str(), str(location.sourcePath), location.lineno, location.column, location.position);
             disconnectReported = true;
-            return nullptr;
+            return false;
         }
         daliConnected = true;
     }
+    return true;
+}
+
+unsigned EclCC::lookupClusterSize() const
+{
+    CriticalBlock b(dfsCrit);  // Overkill at present but maybe one day codegen will start threading? If it does the stack is also iffy!
+    if (!optDFS || disconnectReported || !checkDaliConnected())
+        return 0;
+    if (prevClusterSize != -1)
+        return (unsigned) prevClusterSize;
+    const char *cluster = clusters ? clusters.tos() : optCluster.str();
+    if (isEmptyString(cluster) || strieq(cluster, "<unknown>"))
+        prevClusterSize = 0;
+    else
+    {
+        Owned<IConstWUClusterInfo> clusterInfo = getTargetClusterInfo(cluster);
+        prevClusterSize = clusterInfo ? clusterInfo->getSize() : 0;
+    }
+    DBGLOG("Cluster %s has size %d", cluster, prevClusterSize);
+    return prevClusterSize;
+}
 
+IHqlExpression *EclCC::lookupDFSlayout(const char *filename, IErrorReceiver &errs, const ECLlocation &location, bool isOpt) const
+{
+    CriticalBlock b(dfsCrit);  // Overkill at present but maybe one day codegen will start threading?
+    if (!optDFS || disconnectReported)
+    {
+        // Dali lookup disabled, yet translation requested. Should we report if OPT set?
+        if (!(optArchive || optGenerateDepend || optSyntax || optGenerateMeta || optEvaluateResult || disconnectReported))
+        {
+            VStringBuffer msg("Error looking up file %s in DFS - DFS not configured", filename);
+            errs.reportWarning(CategoryDFS, HQLWRN_DFSlookupFailure, msg.str(), str(location.sourcePath), location.lineno, location.column, location.position);
+            disconnectReported = true;
+        }
+        return nullptr;
+    }
+    if (!checkDaliConnected())
+    {
+        VStringBuffer msg("Error looking up file %s in DFS - failed to connect to %s", filename, optDFS.str());
+        errs.reportError(HQLWRN_DFSlookupFailure, msg.str(), str(location.sourcePath), location.lineno, location.column, location.position);
+        return nullptr;
+    }
     // Do any scope manipulation
     StringBuffer lookupName;  // do NOT move inside the curly braces below - this needs to stay in scope longer than that
     if (filename[0]=='~')
@@ -2238,6 +2282,9 @@ int EclCC::parseCommandLineOptions(int argc, const char* argv[])
         else if (iter.matchFlag(optCheckDirty, "-checkDirty"))
         {
         }
+        else if (iter.matchOption(optCluster, "-cluster"))
+        {
+        }
         else if (iter.matchOption(optDFS, "-dfs") || /*deprecated*/ iter.matchOption(optDFS, "-dali"))
         {
             // Note - we wait until first use before actually connecting to dali

+ 1 - 0
ecl/eclcc/eclcc.hpp

@@ -68,6 +68,7 @@ const char * const helpText[] = {
     "File resolution options:",
     "    -dfs=ip       Use specified ip for DFS filename resolution",
     "    -scope=prefix Use specified scope prefix in DFS filename resolution",
+    "    -cluster=name Use specified cluster when calculating graph resource allocation",
     "    -user=id      Use specified username in DFS filename resolution",
     "    -password=xxx Use specified password in DFS filename resolution (blank to prompt)",
     "",

+ 1 - 0
ecl/eclccserver/eclccserver.cpp

@@ -323,6 +323,7 @@ class EclccCompileThread : implements IPooledThread, implements IErrorReporter,
             const char *wuScope = workunit->queryWuScope();
             if (!isEmptyString(wuScope))
                 eclccCmd.appendf(" -scope=%s", wuScope);
+            eclccCmd.appendf(" -cluster=%s", targetCluster);
             SCMStringBuffer token;
             workunit->getSecurityToken(token);
             if (token.length())

+ 7 - 0
ecl/hql/hql.hpp

@@ -195,6 +195,8 @@ interface IEclRepositoryCallback : public IEclRepository
 interface ICodegenContextCallback : public IInterface
 {
     virtual void noteCluster(const char *clusterName) = 0;
+    virtual void pushCluster(const char *clusterName) = 0;
+    virtual void popCluster() = 0;
     virtual bool allowAccess(const char * category, bool isSigned) = 0;
     /**
      * Lookup a file in DFS and return the record definition
@@ -204,6 +206,11 @@ interface ICodegenContextCallback : public IInterface
      * @param location      Location to use when reporting errors
      */
     virtual IHqlExpression *lookupDFSlayout(const char *filename, IErrorReceiver &errs, const ECLlocation &location, bool isOpt) const = 0;
+    /**
+     * Return number of nodes for the current cluster, via Dali lookup, or 0 if cannot be determined.
+     *
+     */
+    virtual unsigned lookupClusterSize() const = 0;
 };
 
 

+ 3 - 0
ecl/hqlcpp/hqlecl.cpp

@@ -53,8 +53,11 @@ class NullContextCallback : implements ICodegenContextCallback, public CInterfac
     IMPLEMENT_IINTERFACE
 
     virtual void noteCluster(const char *clusterName) override {}
+    virtual void pushCluster(const char *clusterName) override {}
+    virtual void popCluster() override {}
     virtual bool allowAccess(const char * category, bool isSigned) override { return true; }
     virtual IHqlExpression *lookupDFSlayout(const char *filename, IErrorReceiver &errs, const ECLlocation &location, bool isOpt) const override { return nullptr; }
+    virtual unsigned lookupClusterSize() const override { return 0; }
 };
 
 class HqlDllGenerator : implements IHqlExprDllGenerator, implements IAbortRequestCallback, public CInterface

+ 8 - 0
ecl/hqlcpp/hqlhtcpp.cpp

@@ -7723,7 +7723,12 @@ void HqlCppTranslator::pushCluster(BuildCtx & ctx, IHqlExpression * cluster)
     StringBuffer clusterText;
     getStringValue(clusterText, cluster);
     if (clusterText.length())
+    {
         ctxCallback->noteCluster(clusterText.str());
+        ctxCallback->pushCluster(clusterText.str());
+    }
+    else
+        ctxCallback->pushCluster("<unknown>");
 }
 
 
@@ -7731,6 +7736,7 @@ void HqlCppTranslator::popCluster(BuildCtx & ctx)
 {
     HqlExprArray args;
     callProcedure(ctx, restoreClusterId, args);
+    ctxCallback->popCluster();
 }
 
 
@@ -9445,6 +9451,8 @@ IHqlExpression * HqlCppTranslator::getResourcedGraph(IHqlExpression * expr, IHql
     unsigned numNodes = 0;
     if (options.specifiedClusterSize != 0)
         numNodes = options.specifiedClusterSize;
+    else
+        numNodes = ctxCallback->lookupClusterSize();
 
     traceExpression("BeforeResourcing", resourced);