Explorar o código

HPCC-26610 Write file access cost into workunit

Signed-off-by: Shamser Ahmed <shamser.ahmed@lexisnexis.com>
Shamser Ahmed %!s(int64=3) %!d(string=hai) anos
pai
achega
c662e22e3e

+ 1 - 1
dali/base/dadfs.cpp

@@ -177,7 +177,7 @@ static IPropertyTree *getEmptyAttr()
     return createPTree("Attr");
 }
 
-static double calcFileCost(const char * cluster, double sizeGB, double fileAgeDays, __int64 numDiskWrites, __int64 numDiskReads)
+extern da_decl double calcFileCost(const char * cluster, double sizeGB, double fileAgeDays, __int64 numDiskWrites, __int64 numDiskReads)
 {
     Owned<IPropertyTree> plane = getStoragePlane(cluster);
     Owned<IPropertyTree> global;

+ 1 - 1
dali/base/dadfs.hpp

@@ -849,7 +849,7 @@ inline const char *queryFileKind(IFileDescriptor *f) { return queryFileKind(f->q
 extern da_decl void ensureFileScope(const CDfsLogicalFileName &dlfn, unsigned timeoutms=INFINITE);
 
 extern da_decl bool checkLogicalName(const char *lfn,IUserDescriptor *user,bool readreq,bool createreq,bool allowquery,const char *specialnotallowedmsg);
-
+extern da_decl double calcFileCost(const char * cluster, double sizeGB, double fileAgeDays, __int64 numDiskWrites, __int64 numDiskReads);
 constexpr bool defaultPrivilegedUser = true;
 constexpr bool defaultNonPrivilegedUser = false;
 

+ 12 - 0
system/jlib/jptree.cpp

@@ -8929,7 +8929,19 @@ static std::tuple<std::string, IPropertyTree *, IPropertyTree *> doLoadConfigura
 
     //For legacy (and other weird cases) ensure there is a global section
     if (!newGlobalConfig)
+    {
         newGlobalConfig.setown(createPTree("global"));
+#ifndef _CONTAINERIZED
+        // The cost PT needs to be in the global config.  In bare-metal, the cost PT is generated in
+        // the component's xml file.  Copying cost PT from component to the global config
+        IPropertyTree * costPT = newComponentConfig->queryPropTree("cost");
+        if (costPT)
+        {
+            IPropertyTree * globalCostPT = newGlobalConfig->addPropTree("cost");
+            mergeConfiguration(*globalCostPT, *costPT, nullptr, false);
+        }
+#endif
+    }
 
 #ifdef _DEBUG
     // NB: don't re-hold, if CLI --hold already held.

+ 1 - 0
system/jlib/jstatcodes.h

@@ -240,6 +240,7 @@ enum StatisticKind
     StSizeAgentReply,
     StTimeAgentWait,
     StCycleAgentWaitCycles,
+    StCostFileAccess,
     StMax,
 
     //For any quantity there is potentially the following variants.

+ 3 - 2
system/jlib/jstats.cpp

@@ -778,7 +778,7 @@ extern jlib_decl StatsMergeAction queryMergeMode(StatisticKind kind)
 #define IPV4STAT(y) STAT(IPV4, y, SMeasureIPV4)
 #define CYCLESTAT(y) St##Cycle##y##Cycles, SMeasureCycle, St##Time##y, St##Cycle##y##Cycles, { NAMES(Cycle, y##Cycles) }, { TAGS(Cycle, y##Cycles) }
 #define ENUMSTAT(y) STAT(Enum, y, SMeasureEnum)
-
+#define COSTSTAT(y) STAT(Cost, y, SMeasureCost)
 //--------------------------------------------------------------------------------------------------------------------
 
 class StatisticMeta
@@ -912,10 +912,11 @@ static const StatisticMeta statsMetaData[StMax] = {
     { SIZESTAT(OsDiskWrite) },
     { TIMESTAT(Blocked) },
     { CYCLESTAT(Blocked) },
-    { STAT(Cost, Execute, SMeasureCost) },
+    { COSTSTAT(Execute) },
     { SIZESTAT(AgentReply) },
     { TIMESTAT(AgentWait) },
     { CYCLESTAT(AgentWait) },
+    { COSTSTAT(FileAccess) }
 };
 
 //Is a 0 value likely, and useful to be reported if it does happen to be zero?

+ 1 - 16
thorlcr/activities/fetch/thfetch.cpp

@@ -108,22 +108,7 @@ public:
     }
     virtual void done() override
     {
-        if (!subFileStats.empty())
-        {
-            unsigned numSubFiles = subFileStats.size();
-            for (unsigned i=0; i<numSubFiles; i++)
-            {
-                IDistributedFile *file = queryReadFile(i);
-                if (file)
-                    file->addAttrValue("@numDiskReads", subFileStats[i]->getStatisticSum(StNumDiskReads));
-            }
-        }
-        else
-        {
-            IDistributedFile *file = queryReadFile(0);
-            if (file)
-                file->addAttrValue("@numDiskReads", statsCollection.getStatisticSum(StNumDiskReads));
-        }
+        updateFileReadCostStats(subFileStats);
         CMasterActivity::done();
     }
 };

+ 1 - 16
thorlcr/activities/indexread/thindexread.cpp

@@ -299,22 +299,7 @@ public:
     }
     virtual void done() override
     {
-        if (!subIndexFileStats.empty())
-        {
-            unsigned numSubFiles = subIndexFileStats.size();
-            for (unsigned i=0; i<numSubFiles; i++)
-            {
-                IDistributedFile *file = queryReadFile(i);
-                if (file)
-                    file->addAttrValue("@numDiskReads", subIndexFileStats[i]->getStatisticSum(StNumDiskReads));
-            }
-        }
-        else
-        {
-            IDistributedFile *file = queryReadFile(0);
-            if (file)
-                file->addAttrValue("@numDiskReads", statsCollection.getStatisticSum(StNumDiskReads));
-        }
+        updateFileReadCostStats(subIndexFileStats);
         CMasterActivity::done();
     }
 };

+ 1 - 1
thorlcr/activities/indexwrite/thindexwrite.cpp

@@ -280,7 +280,7 @@ public:
                 bloom->setProp("@bloomProbability", pval.str());
             }
             container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), 0, false, WUFileStandard, &clusters);
-            props.setPropInt64("@numDiskWrites", statsCollection.getStatisticSum(StNumDiskWrites));
+            updateFileWriteCostStats(*fileDesc, props, statsCollection.getStatisticSum(StNumDiskWrites));
             if (!dlfn.isExternal())
                 queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc);
         }

+ 1 - 7
thorlcr/activities/keyedjoin/thkeyedjoin.cpp

@@ -566,13 +566,7 @@ public:
     }
     virtual void done() override
     {
-        unsigned numSubFiles = fileStats.size();
-        for (unsigned i=0; i<numSubFiles; i++)
-        {
-            IDistributedFile *file = queryReadFile(i);
-            if (file) //publish the number of disk reads for indexes and disk fetches.
-                file->addAttrValue("@numDiskReads", fileStats[i]->getStatisticSum(StNumDiskReads));
-        }
+        updateFileReadCostStats(fileStats);
         CMasterActivity::done();
     }
 };

+ 2 - 17
thorlcr/activities/thdiskbase.cpp

@@ -119,22 +119,7 @@ void CDiskReadMasterBase::serializeSlaveData(MemoryBuffer &dst, unsigned slave)
 
 void CDiskReadMasterBase::done()
 {
-    if (!subFileStats.empty())
-    {
-        unsigned numSubFiles = subFileStats.size();
-        for (unsigned i=0; i<numSubFiles; i++)
-        {
-            IDistributedFile *file = queryReadFile(i);
-            if (file)
-                file->addAttrValue("@numDiskReads", subFileStats[i]->getStatisticSum(StNumDiskReads));
-        }
-    }
-    else
-    {
-        IDistributedFile *file = queryReadFile(0);
-        if (file)
-            file->addAttrValue("@numDiskReads", statsCollection.getStatisticSum(StNumDiskReads));
-    }
+    updateFileReadCostStats(subFileStats);
     CMasterActivity::done();
 }
 
@@ -253,7 +238,7 @@ void CWriteMasterBase::publish()
     }
     if (TDWrestricted & diskHelperBase->getFlags())
         props.setPropBool("restricted", true );
-    props.setPropInt64("@numDiskWrites", statsCollection.getStatisticSum(StNumDiskWrites));
+    updateFileWriteCostStats(*fileDesc, props, statsCollection.getStatisticSum(StNumDiskWrites));
     container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters);
     if (!dlfn.isExternal())
     {

+ 46 - 0
thorlcr/graph/thgraphmaster.cpp

@@ -528,6 +528,8 @@ void CMasterActivity::deserializeStats(unsigned node, MemoryBuffer &mb)
 void CMasterActivity::getActivityStats(IStatisticGatherer & stats)
 {
     statsCollection.getStats(stats);
+    if (diskAccessCost)
+        stats.addStatistic(StCostFileAccess, diskAccessCost);
 }
 
 void CMasterActivity::getEdgeStats(IStatisticGatherer & stats, unsigned idx)
@@ -547,6 +549,50 @@ void CMasterActivity::done()
     }
 }
 
+void CMasterActivity::updateFileReadCostStats(std::vector<OwnedPtr<CThorStatsCollection>> & subFileStats)
+{
+    if (!subFileStats.empty())
+    {
+        unsigned numSubFiles = subFileStats.size();
+        for (unsigned i=0; i<numSubFiles; i++)
+        {
+            IDistributedFile *file = queryReadFile(i);
+            if (file)
+            {
+                stat_type numDiskReads = subFileStats[i]->getStatisticSum(StNumDiskReads);
+                StringBuffer clusterName;
+                file->getClusterName(0, clusterName);
+                diskAccessCost += money2cost_type(calcFileCost(clusterName, 0, 0, 0, numDiskReads));
+                file->addAttrValue("@numDiskReads", numDiskReads);
+            }
+        }
+    }
+    else
+    {
+        IDistributedFile *file = queryReadFile(0);
+        if (file)
+        {
+            stat_type numDiskReads = statsCollection.getStatisticSum(StNumDiskReads);
+            StringBuffer clusterName;
+            file->getClusterName(0, clusterName);
+            diskAccessCost += money2cost_type(calcFileCost(clusterName, 0, 0, 0, numDiskReads));
+            file->addAttrValue("@numDiskReads", numDiskReads);
+        }
+    }
+}
+
+void CMasterActivity::updateFileWriteCostStats(IFileDescriptor & fileDesc, IPropertyTree &props, stat_type numDiskWrites)
+{
+    if (numDiskWrites)
+    {
+        props.setPropInt64("@numDiskWrites", numDiskWrites);
+        assertex(fileDesc.numClusters()>=1);
+        StringBuffer clusterName;
+        fileDesc.getClusterGroupName(0, clusterName);// Note: calculating for 1st cluster. (Future: calc for >1 clusters)
+        diskAccessCost += money2cost_type(calcFileCost(clusterName, 0, 0, numDiskWrites, 0));
+    }
+}
+
 //////////////////////
 // CMasterGraphElement impl.
 //

+ 3 - 1
thorlcr/graph/thgraphmaster.ipp

@@ -92,7 +92,6 @@ class graphmaster_decl CMasterGraph : public CGraphBase
     bool sentGlobalInit = false;
     CThorStatsCollection graphStats;
 
-
     CReplyCancelHandler activityInitMsgHandler, bcastMsgHandler, executeReplyMsgHandler;
 
     void sendQuery();
@@ -248,9 +247,12 @@ protected:
     std::vector<OwnedPtr<CThorEdgeCollection>> edgeStatsVector;
     CThorStatsCollection statsCollection;
     IBitSet *notedWarnings;
+    stat_type diskAccessCost = 0;
 
     void addReadFile(IDistributedFile *file, bool temp=false);
     IDistributedFile *queryReadFile(unsigned f);
+    void updateFileReadCostStats(std::vector<OwnedPtr<CThorStatsCollection>> & subFileStats);
+    void updateFileWriteCostStats(IFileDescriptor & fileDesc, IPropertyTree &props, stat_type numDiskWrites);
     virtual void process() { }
 public:
     IMPLEMENT_IINTERFACE_USING(CActivityBase)