Explorar el Código

HPCC-18069 Implement an efficient scope filter

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday hace 8 años
padre
commit
4a1ad67f93

+ 5 - 21
common/workunit/workflow.cpp

@@ -63,33 +63,17 @@ EnumMapping wfstates[] =
 
 static void setEnum(IPropertyTree *p, const char *propname, int value, EnumMapping *map)
 {
-    const char *defval = map->str;
-    while (map->str)
-    {
-        if (value==map->val)
-        {
-            p->setProp(propname, map->str);
-            return;
-        }
-        map++;
-    }
-    assertex(!"Unexpected value in setEnum");
-    p->setProp(propname, defval);
+    const char * mapped = getEnumText(value, map, nullptr);
+    if (!mapped)
+        assertex(!"Unexpected value in setEnum");
+    p->setProp(propname, mapped);
 }
 
 static int getEnum(IPropertyTree *p, const char *propname, EnumMapping *map)
 {
     const char *v = p->queryProp(propname);
     if (v)
-    {
-        while (map->str)
-        {
-            if (stricmp(v, map->str)==0)
-                return map->val;
-            map++;
-        }
-        assertex(!"Unexpected value in getEnum");
-    }
+        return getEnum(v, map);
     return 0;
 }
 

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 1177 - 205
common/workunit/workunit.cpp


+ 109 - 2
common/workunit/workunit.hpp

@@ -36,6 +36,7 @@
 #include "jutil.hpp"
 #include "jprop.hpp"
 #include "wuattr.hpp"
+#include <vector>
 
 #define LEGACY_GLOBAL_SCOPE "workunit"
 #define GLOBAL_SCOPE ""
@@ -989,6 +990,8 @@ interface IConstWUStatisticIterator : extends IScmIterator
     virtual IConstWUStatistic & query() = 0;
 };
 
+//---------------------------------------------------------------------------------------------------------------------
+
 /*
  * An interface that is provided as a callback to a scope iterator to report the when iterating scopes
  */
@@ -1002,15 +1005,117 @@ interface IWuScopeVisitor
 /*
  * Interface for an iterator that walks through the different logical elements (scopes) within a workunit
  */
+enum WuPropertyTypes : unsigned
+{
+    PTnone                  = 0x00,
+    PTstatistics            = 0x01,
+    PTattributes            = 0x02,
+    PThints                 = 0x04,
+    PTscope                 = 0x08, // Just the existence of the scope is interesting
+    PTall                   = 0xFF,
+};
+BITMASK_ENUM(WuPropertyTypes);
+
+enum WuScopeSourceFlags : unsigned
+{
+    SSFsearchDefault        = 0x0000,
+    SSFsearchGlobalStats    = 0x0001,
+    SSFsearchGraphStats     = 0x0002,
+    SSFsearchGraph          = 0x0004,
+    SSFsearchExceptions     = 0x0008,
+    SSFsearchAll            = UINT_MAX,
+};
+BITMASK_ENUM(WuScopeSourceFlags);
+
+/* WuScopeFilter syntax:
+ * initial match:   scope[<scope-id>] | stype[<scope-type>] | id[<scope-id>] | depth[<value>| <min>,<max>]
+ *                  source[global|stats|graph|exception]
+ * stats filter:    where[<stat> | <stat>(=|!=|<|>|<=|>=)value | <stat>=<min>..<max>]
+ *
+ * returned scopes: matched[true|false] | nested[<depth>] | include[<scope-type>]
+ * returned information:
+ *                  prop[stat|attr|hint|scope]
+ *                  stat[<stat-name>] | attr[<attr-name>] | hint[<hint-name>] | measure[<measure-name>]
+ */
+class WORKUNIT_API WuScopeFilter
+{
+public:
+    WuScopeFilter() = default;
+    WuScopeFilter(const char * filter);
+
+    void addFilter(const char * filter);
+    void addScope(const char * scope);
+    void addScopeType(const char * scopeType);
+    void addId(const char * id);
+    void setDepth(unsigned low, unsigned high);
+    void setSource(const char * source);
+
+    void setIncludeMatch(bool value);
+    void setIncludeNesting(unsigned depth);
+    void setIncludeScopeType(const char * scopeType);
+
+    void addOutput(const char * prop);              // Which statistics/properties/hints are required.
+    void addOutputProperties(const char * prop);    // stat/attr/hint/scope
+    void addOutputStatistic(const char * prop);
+    void addOutputAttribute(const char * prop);
+    void addOutputHint(const char * prop);
+
+    void finishedFilter(); // Call once filter has been completely set up
+
+    bool includeStatistic(StatisticKind kind) const;
+    bool includeAttribute(WuAttr attr) const;
+    bool includeHint(const char * kind) const;
+    bool includeScope(const char * scope) const;
+
+    ScopeCompare compareMatchScopes(const char * scope) const;
+    const ScopeFilter & queryIterFilter() const;
+    bool isOptimized() const { return optimized; }
+
+protected:
+    void addRequiredStat(const char * filter);
+    bool matchOnly(StatisticScopeType scopeType) const;
+
+    //MORE: Make the following protected/private
+public:
+//The following members control which scopes are matched by the iterator
+    ScopeFilter scopeFilter;                            // Filter that must be matched by a scope
+    std::vector<StatisticValueFilter> requiredStats;    // The attributes that must be present for a particular scope
+    WuScopeSourceFlags sourceFlags = SSFsearchDefault;  // Which sources within the workunit should be included.  Default is to calculate from the properties.
+
+// Once a match has been found which scopes are returned?
+    struct
+    {
+        bool matchedScope = true;
+        unsigned nestedDepth = UINT_MAX;
+        UnsignedArray scopeTypes;
+    } include;
+
+// For all scopes that are returned, what information is required?
+    WuPropertyTypes properties = PTnone;  // What kind of information is desired (can be used to optimize the scopes). Default is scopes (for selected sources)
+    UnsignedArray desiredStats;
+    UnsignedArray desiredAttrs;
+    StringArray desiredHints;
+    StatisticMeasure desiredMeasure = SMeasureAll;
+
+    __uint64 minVersion = 0;
+    bool preFilterScope = false;
+    bool optimized = false;
+    //NB: Optimize scopeFilter.hasSingleMatch() + bail out early
+};
+
 interface IConstWUScopeIterator : extends IScmIterator
 {
+    //Allow iteration of the tree without walking through all the nodes.
+    virtual bool nextSibling() = 0;
+    virtual bool nextParent() = 0;
+
     //These return values are invalid after a call to next() or another call to the same function
     virtual const char * queryScope() const = 0;
     virtual StatisticScopeType getScopeType() const = 0;
 
     //Provide information about all stats, attributes and hints
     //MORE: should allow a mask to indicate which information is reported
-    virtual void playProperties(IWuScopeVisitor & visitor) = 0;
+    virtual void playProperties(WuPropertyTypes whichProperties, IWuScopeVisitor & visitor) = 0;
 
     //Return true if the stat is present, if found and update the value - queryStat() wrapper is generally easier to use.
     virtual bool getStat(StatisticKind kind, unsigned __int64 & value) const = 0;
@@ -1025,6 +1130,8 @@ interface IConstWUScopeIterator : extends IScmIterator
     }
 };
 
+//---------------------------------------------------------------------------------------------------------------------
+
 //! IWorkUnit
 //! Provides high level access to WorkUnit "header" data.
 interface IWorkUnit;
@@ -1105,7 +1212,7 @@ interface IConstWorkUnit : extends IConstWorkUnitInfo
     virtual IConstWUWebServicesInfo * getWebServicesInfo() const = 0;
     virtual IConstWUStatisticIterator & getStatistics(const IStatisticsFilter * filter) const = 0; // filter must currently stay alive while the iterator does.
     virtual IConstWUStatistic * getStatistic(const char * creator, const char * scope, StatisticKind kind) const = 0;
-    virtual IConstWUScopeIterator & getScopeIterator(const IStatisticsFilter * filter) const = 0; // filter must currently stay alive while the iterator does.
+    virtual IConstWUScopeIterator & getScopeIterator(const WuScopeFilter & filter) const = 0; // filter must currently stay alive while the iterator does.
     virtual IConstWUResult * getVariableByName(const char * name) const = 0;
     virtual IConstWUResultIterator & getVariables() const = 0;
     virtual bool isPausing() const = 0;

+ 1 - 1
common/workunit/workunit.ipp

@@ -303,7 +303,7 @@ public:
     virtual IConstWUResultIterator & getTemporaries() const;
     virtual IConstWUStatisticIterator & getStatistics(const IStatisticsFilter * filter) const;
     virtual IConstWUStatistic * getStatistic(const char * creator, const char * scope, StatisticKind kind) const;
-    virtual IConstWUScopeIterator & getScopeIterator(const IStatisticsFilter * filter) const override;
+    virtual IConstWUScopeIterator & getScopeIterator(const WuScopeFilter & filter) const override;
     virtual IConstWUWebServicesInfo * getWebServicesInfo() const;
     virtual IStringVal & getXmlParams(IStringVal & params, bool hidePasswords) const;
     virtual const IPropertyTree *getXmlParams() const;

+ 1 - 0
common/workunit/wuattr.cpp

@@ -37,6 +37,7 @@ public:
 
 const static WuAttrInfo attrInfo[] = {
     { WANone, SMeasureNone, "none", nullptr, nullptr, nullptr },
+    { WAAll, SMeasureNone, "all", nullptr, nullptr, nullptr },
     CHILD(Kind, SMeasureEnum, "_kind"),
     ATTR(Source, SMeasureText, "@source"),
     ATTR(Target, SMeasureText, "@target"),

+ 2 - 1
common/workunit/wuattr.hpp

@@ -28,9 +28,10 @@
 #endif
 
 //The wuattribute values start from a high value - so that they do not overlap with StXXX
-enum WuAttr
+enum WuAttr : unsigned
 {
     WANone = 0x80000000,
+    WAAll,
     WAKind,
     WASource,
     WATarget,

+ 12 - 18
dali/daliadmin/daliadmin.cpp

@@ -2729,7 +2729,9 @@ public:
     virtual void noteAttribute(WuAttr attr, const char * value)
     {
         StringBuffer xml;
-        xml.appendf("<attr kind='%s' value='%s'/>", queryWuAttributeName(attr), value);
+        xml.appendf("<attr kind='%s' value='", queryWuAttributeName(attr));
+        encodeXML(value, xml, ENCODE_NEWLINES, (unsigned)-1, true);
+        xml.append("'/>");
         printf(" %s\n", xml.str());
     }
     virtual void noteHint(const char * kind, const char * value)
@@ -2740,28 +2742,26 @@ public:
     }
 };
 
-static void dumpWorkunitAttr(IConstWorkUnit * workunit, const StatisticsFilter & filter)
+static void dumpWorkunitAttr(IConstWorkUnit * workunit, const WuScopeFilter & filter)
 {
     ScopeDumper dumper;
 
     printf("<Workunit wuid=\"%s\">\n", workunit->queryWuid());
 
-    Owned<IConstWUScopeIterator> iter = &workunit->getScopeIterator(&filter);
+    Owned<IConstWUScopeIterator> iter = &workunit->getScopeIterator(filter);
     ForEach(*iter)
     {
         printf("<scope scope='%s' type='%s'>\n", iter->queryScope(), queryScopeTypeName(iter->getScopeType()));
-        iter->playProperties(dumper);
+        iter->playProperties(PTall, dumper);
         printf("</scope>\n");
     }
 
     printf("</Workunit>\n");
 }
 
-static void dumpWorkunitAttr(const char *wuid, const char * creatorTypeText, const char * creator, const char * scopeTypeText, const char * scope, const char * kindText, const char * userFilter)
+static void dumpWorkunitAttr(const char *wuid, const char * userFilter)
 {
-    StatisticsFilter filter(checkDash(creatorTypeText), checkDash(creator), checkDash(scopeTypeText), checkDash(scope), NULL, checkDash(kindText));
-    if (userFilter)
-        filter.setFilter(userFilter);
+    WuScopeFilter filter(userFilter);
 
     Owned<IWorkUnitFactory> factory = getWorkUnitFactory();
     const char * star = strchr(wuid, '*');
@@ -3663,17 +3663,11 @@ int main(int argc, char* argv[])
                         migrateFiles(srcGroup, dstGroup, filemask, options);
                     }
                     else if (stricmp(cmd, "wuattr") == 0) {
-                        CHECKPARAMS(1, 6);
-                        if ((params.ordinality() >= 3) && (strchr(params.item(2), '[')))
-                        {
-                            dumpWorkunitAttr(params.item(1), "-", "-", "-", "-", "-", params.item(2));
-                        }
+                        CHECKPARAMS(1, 2);
+                        if (params.ordinality() > 2)
+                            dumpWorkunitAttr(params.item(1), params.item(2));
                         else
-                        {
-                            while (params.ordinality() < 7)
-                                params.append("*");
-                            dumpWorkunitAttr(params.item(1), params.item(2), params.item(3), params.item(4), params.item(5), params.item(6), nullptr);
-                        }
+                            dumpWorkunitAttr(params.item(1), nullptr);
                     }
                     else
                         ERRLOG("Unknown command %s",cmd);

+ 1 - 2
ecl/hql/hqltrans.cpp

@@ -170,8 +170,7 @@ void HqlTransformStats::gatherTransformStats(IStatisticTarget & target, const ch
 {
 #ifdef TRANSFORM_STATS_TIME
     target.addStatistic(SSTcompilestage, scope, StTimeTotalExecute, nullptr, cycle_to_nanosec(totalTime), 1, 0, StatsMergeSum);
-    if ((childTime-recursiveTime) != 0)
-        target.addStatistic(SSTcompilestage, scope, StTimeLocalExecute, nullptr, cycle_to_nanosec(totalTime-(childTime-recursiveTime)), 1, 0, StatsMergeSum);
+    target.addStatistic(SSTcompilestage, scope, StTimeLocalExecute, nullptr, cycle_to_nanosec(totalTime-(childTime-recursiveTime)), 1, 0, StatsMergeSum);
 #endif
 }
 

+ 120 - 0
ecl/regress/testwudetail

@@ -0,0 +1,120 @@
+#!/bin/bash
+wu=$1
+if [ -z $1 ]
+then
+    #sqagg on thor
+    wu=W20170829-172623
+    if [ -z $wu ]
+    then
+        echo WUID not supplied
+        exit
+    fi;
+fi;
+
+function daliadmin {
+    echo ------ daliadmin $1 ------
+    echo $daliadmincmd . wuattr $wu $1
+    $daliadmincmd . wuattr $wu $1
+    echo
+    echo
+}
+
+function xdaliadmin {
+    daliadmin $1
+    donothing=x
+}
+
+daliadmincmd=/home/gavin/buildr/RelWithDebInfo/bin/daliadmin
+#The following should be TimeLocalExecute, but thor needs to start publishing it.
+timeattr=TimeMaxLocalExecute
+
+searchsg='graph1:sg1'
+searchid='sg1'
+searchsg2='graph1:sg30'
+
+#Only the scope lists - filter by the different sources
+xdaliadmin prop[scope],source[all]
+xdaliadmin prop[scope],source[global]
+xdaliadmin prop[scope],source[stats]
+xdaliadmin prop[scope],source[graph]
+xdaliadmin prop[scope],source[exception]
+
+#Filter which attributes are returned
+#All attributes
+xdaliadmin stat[all]
+#Only elapsed time attributes
+xdaliadmin stat[TimeElapsed]
+#Only hints
+xdaliadmin hint[all]
+#Only attributes
+xdaliadmin attr[all]
+
+# Provide a list of top level scopes/activities
+xdaliadmin depth[1],nested[0],source[global]
+
+
+# check extracting attributes at a fixed depth
+xdaliadmin depth[2],nested[0],source[global]
+xdaliadmin depth[2],nested[0],source[stats]
+xdaliadmin depth[2],nested[0],source[graph]
+
+# provide the values for [TimeLocalExecute] for all top level scopes { Top level heat map }
+xdaliadmin depth[1],nested[0],stat[$timeattr]
+
+# Single root global scope (blank)
+xdaliadmin depth[0],nested[0],prop[scope]
+
+# first level of scopes - both forms should be equivalent, but implemented differently
+xdaliadmin depth[1],nested[0],prop[scope]
+xdaliadmin depth[0],nested[1],matched[false],prop[scope]
+
+# second level of scopes - both forms should be equivalent, but implemented differently
+xdaliadmin depth[2],nested[0],prop[scope]
+xdaliadmin depth[1],nested[1],matched[false],prop[scope]
+
+# Provide all the children of element [n] in the global element [n] { Expand subgraphs within a graph }
+xdaliadmin scope[$searchsg],matched[false],nested[1],prop[scope]
+
+# Provide the scope information for a particular activity { To map errors to graph locations }
+xdaliadmin id[$searchid],nested[0],prop[scope]
+
+# Provide an entire heirarchy starting from a particular subgraph. { quick sub-graph view }
+xdaliadmin scope[$searchsg],prop[scope]
+
+# For all activities that read data, return the $timeattr. { A filtered heat map }
+xdaliadmin where[NumMinDiskReads],nested[0],stat[$timeattr],stat[NumMinDiskReads]
+
+# Return children for 2 items - which nest inside each other
+xdaliadmin id[sg30],id[sg33],nested[1],matched[false],prop[scope]
+
+# For all activities in a particular subgraph return all time attributes { A multiple series bar chart }
+xdaliadmin id[sg1],include[activity],matched[false],measure[Time]
+xdaliadmin id[sg30],include[activity],measure[Time]
+
+#Check matches within a range of depths.
+daliadmin include[activity],depth[5,7],nested[0],prop[scope]
+
+# All attributes for all activities within a subgraph (but not within a child subgraph) { a table of attributes for the current subgraph }
+xdaliadmin scope[$searchsg],nested[1],include[activity],matched[false],prop[all]
+
+# For all activities return WhenFirstRow and TimeElapsed { gantt chart of activities within a subgraph }
+xdaliadmin scope[$searchsg],nested[1],include[activity],matched[false],stat[WhenMinFirstRow],stat[TimeMaxLocalExecute]
+
+#MORE: Does the filter apply to the match criteria or the child values?  May also need having?
+xdaliadmin scope[$searchsg2],include[activity],matched[false],where[WhenMinFirstRow],measure[When],stat[WhenMinFirstRow],stat[TimeMaxLocalExecute]
+
+# Update all properties for a subgraph { e.g., for updating graph progress }.  version[1] implies no static values
+xdaliadmin scope[$searchsg],version[1],prop[all]
+
+# Full dump of all statistics - to provide raw material for tables/client side analysis { stats tab, user xml output }
+xdaliadmin prop[stat],version[1]
+
+# Find all activities which spent more than 100us processing.
+xdaliadmin stype[activity],nested[0],where[$timeattr],stat[$timeattr]
+xdaliadmin "stype[activity],nested[0],where[$timeattr>=1000000],stat[$timeattr]"
+xdaliadmin stype[activity],nested[0],where[$timeattr=1000000,],stat[$timeattr]
+xdaliadmin stype[activity],nested[0],where[$timeattr=1000000,2000000],stat[$timeattr]
+xdaliadmin stype[activity],nested[0],where[$timeattr=1000us,2ms],stat[$timeattr]
+
+# Find all activities which spent more than 1 minute sorting { anomaly detection }
+xdaliadmin "stype[activity],nested[0],where[TimeSortElapsed>=60s],prop[scope]"

+ 1 - 0
system/jlib/jarray.hpp

@@ -50,6 +50,7 @@ public:
     inline aindex_t length() const                 { return used; } /* Return number of items  */
     inline aindex_t ordinality() const             { return used; } /* Return number of items  */
     inline bool empty() const                    { return (used==0); }
+    inline explicit operator bool() const        { return (used != 0); }
 
 protected:
     void * _doBAdd(void *, size32_t size, StdCompare compare, bool & isNew);

+ 347 - 4
system/jlib/jstats.cpp

@@ -121,6 +121,19 @@ MODULE_INIT(INIT_PRIORITY_STANDARD)
 
 extern jlib_decl int compareScopeName(const char * left, const char * right)
 {
+    if (!*left)
+    {
+        if (!*right)
+            return 0;
+        else
+            return -1;
+    }
+    else
+    {
+        if (!*right)
+            return +1;
+    }
+
     StatsScopeId leftId;
     StatsScopeId rightId;
     for(;;)
@@ -421,8 +434,76 @@ void formatStatistic(StringBuffer & out, unsigned __int64 value, StatisticKind k
 
 //--------------------------------------------------------------------------------------------------------------------
 
-unsigned queryStatisticsDepth(const char * text)
+stat_type readStatisticValue(const char * cur, const char * * end, StatisticMeasure measure)
 {
+    char * next;
+    stat_type value = strtoll(cur, &next, 10);
+
+    switch (measure)
+    {
+    case SMeasureTimeNs:
+        //Allow s, ms and us as scaling suffixes
+        if (next[0] == 's')
+        {
+            value *= 1000000000;
+            next++;
+        }
+        else if ((next[0] == 'm') && (next[1] == 's'))
+        {
+            value *= 1000000;
+            next += 2;
+        }
+        else if ((next[0] == 'u') && (next[1] == 's'))
+        {
+            value *= 1000;
+            next += 2;
+        }
+        break;
+    case SMeasureCount:
+    case SMeasureSize:
+        //Allow K, M, G as scaling suffixes
+        if (next[0] == 'K')
+        {
+            value *= 0x400;
+            next++;
+        }
+        else if (next[0] == 'M')
+        {
+            value *= 0x100000;
+            next++;
+        }
+        else if (next[0] == 'G')
+        {
+            value *= 0x40000000;
+            next++;
+        }
+        //Skip bytes marker
+        if ((*next == 'b') || (*next == 'B'))
+            next++;
+        break;
+    case SMeasurePercent:
+        //MORE: Extend to allow fractional percentages
+        //Allow % to mean a percentage - instead of ppm
+        if (next[0] == '%')
+        {
+            value *= 10000;
+            next++;
+        }
+        break;
+    }
+
+    if (end)
+        *end = next;
+    return value;
+}
+
+//--------------------------------------------------------------------------------------------------------------------
+
+unsigned queryScopeDepth(const char * text)
+{
+    if (!*text)
+        return 0;
+
     unsigned depth = 1;
     for (;;)
     {
@@ -438,6 +519,27 @@ unsigned queryStatisticsDepth(const char * text)
     }
 }
 
+const char * queryScopeTail(const char * scope)
+{
+    const char * colon = strrchr(scope, ':');
+    if (colon)
+        return colon+1;
+    else
+        return scope;
+}
+
+bool getParentScope(StringBuffer & parent, const char * scope)
+{
+    const char * colon = strrchr(scope, ':');
+    if (colon)
+    {
+        parent.append(colon-scope, scope);
+        return true;
+    }
+    else
+        return false;
+}
+
 
 const char * queryMeasurePrefix(StatisticMeasure measure)
 {
@@ -1106,6 +1208,11 @@ unsigned StatsScopeId::getHash() const
     }
 }
 
+bool StatsScopeId::isWildcard() const
+{
+    return (id == 0) && (extra == 0) && !name;
+}
+
 int StatsScopeId::compare(const StatsScopeId & other) const
 {
     if (scopeType != other.scopeType)
@@ -2447,6 +2554,242 @@ CRuntimeStatisticCollection * CNestedSummaryRuntimeStatisticMap::createStats(con
 
 //---------------------------------------------------
 
+void StatsAggregation::noteValue(stat_type value)
+{
+    if (count == 0)
+    {
+        minValue = value;
+        maxValue = value;
+    }
+    else
+    {
+        if (value < minValue)
+            minValue = value;
+        else if (value > maxValue)
+            maxValue = value;
+    }
+
+    count++;
+    sumValue += value;
+}
+
+stat_type StatsAggregation::getAve() const
+{
+    return (sumValue / count);
+}
+
+
+//---------------------------------------------------
+
+ScopeCompare compareScopes(const char * scope, const char * key)
+{
+    byte left = *scope;
+    byte right = *key;
+    //Check for root scope "" compared with anything
+    if (!left)
+    {
+        if (!right)
+            return SCequal;
+        return SCparent;
+    }
+    else if (!right)
+    {
+        return SCchild;
+    }
+
+    bool hadCommonScope = false;
+    for (;;)
+    {
+        if (left != right)
+        {
+            //FUTURE: Extend this function to support skipping numbers to allow wildcard matching
+            if (!left)
+            {
+                if (right == ':')
+                    return SCparent; // scope is a parent (prefix) of the key
+            }
+            if (!right)
+            {
+                if (left == ':')
+                    return SCchild;  // scope is a child (superset) of the key
+            }
+            return hadCommonScope ? SCrelated : SCunrelated;
+        }
+
+        if (!left)
+            return SCequal;
+
+        if (left == ':')
+            hadCommonScope = true;
+
+        left = *++scope;
+        right =*++key;
+    }
+}
+
+ScopeFilter::ScopeFilter(const char * scopeList)
+{
+    //MORE: This currently expands a list of scopes - it should probably be improved
+    scopes.appendList(scopeList, ",");
+}
+
+void ScopeFilter::addScope(const char * scope)
+{
+    if (!scope)
+        return;
+
+    if (streq(scope, "*"))
+    {
+        scopes.kill();
+        minDepth = 0;
+        maxDepth = UINT_MAX;
+        return;
+    }
+
+    dbgassertex(!ids && !scopeTypes); // Illegal to specify scopes and ids or scope Types.
+    unsigned depth = queryScopeDepth(scope);
+    if ((scopes.ordinality() == 0) || (depth < minDepth))
+        minDepth = depth;
+    if ((scopes.ordinality() == 0) || (depth > maxDepth))
+        maxDepth = depth;
+    scopes.append(scope);
+}
+
+void ScopeFilter::addScopes(const char * scope)
+{
+    StringArray list;
+    list.appendList(scope, ",");
+    ForEachItemIn(i, list)
+        addScope(list.item(i));
+}
+
+void ScopeFilter::addScopeType(StatisticScopeType scopeType)
+{
+    if (scopeType == SSTall)
+        return;
+
+    dbgassertex(!scopes && !ids);
+    scopeTypes.append(scopeType);
+}
+
+void ScopeFilter::addId(const char * id)
+{
+    dbgassertex(!scopes && !scopeTypes);
+    ids.append(id);
+}
+
+void ScopeFilter::setDepth(unsigned low, unsigned high)
+{
+    minDepth = low;
+    maxDepth = high;
+}
+
+
+ScopeCompare ScopeFilter::compare(const char * scope) const
+{
+    ScopeCompare result = SCunknown;
+    if (scopes)
+    {
+        //If scopes have been provided, then we are searching for an exact match against that scope
+        ForEachItemIn(i, scopes)
+            result |= compareScopes(scope, scopes.item(i));
+    }
+    else
+    {
+        //How does the depth of the scope compare with the range we are expecting?
+        unsigned depth = queryScopeDepth(scope);
+        if (depth < minDepth)
+            return SCparent;
+        if (depth > maxDepth)
+            return SCchild;
+
+        //Assume it is a match until proven otherwise
+        result |= SCequal;
+        // Could be the child of a match
+        if (depth > minDepth)
+            result |= SCchild;
+        //Could be the parent of a match
+        if (depth < maxDepth)
+            result |= SCparent;
+
+        //Check if the type of the current object matches the type
+        const char * tail = queryScopeTail(scope);
+        if (scopeTypes.ordinality())
+        {
+            StatsScopeId id(tail);
+            if (!scopeTypes.contains(id.queryScopeType()))
+                result &= ~SCequal;
+        }
+
+        if (ids)
+        {
+            if (!ids.contains(tail))
+                result &= ~SCequal;
+        }
+    }
+
+    if (!(result & SCequal))
+        return result;
+
+    //Have a match - now check that the attributes match as required
+    //MORE:
+    if (false)
+    {
+        result &= ~SCequal;
+    }
+
+    return result;
+}
+
+int ScopeFilter::compareDepth(unsigned depth) const
+{
+    if (depth < minDepth)
+        return -1;
+    if (depth > maxDepth)
+        return +1;
+    return 0;
+}
+
+bool ScopeFilter::hasSingleMatch() const
+{
+    return scopes.ordinality() == 1 || ids.ordinality() == 1;
+}
+
+bool ScopeFilter::matchOnly(StatisticScopeType scopeType) const
+{
+    if ((scopeTypes.ordinality() == 1) && (scopeTypes.item(0) == scopeType))
+        return true;
+
+    //Check the types of the scopes that are being searched
+    if (scopes.ordinality())
+    {
+        ForEachItemIn(i, scopes)
+        {
+            const char * scopeId = queryScopeTail(scopes.item(i));
+            StatsScopeId id(scopeId);
+            if (id.queryScopeType() != scopeType)
+                return false;
+        }
+        return true;
+    }
+
+    if (ids.ordinality())
+    {
+        ForEachItemIn(i, ids)
+        {
+            const char * scopeId = ids.item(i);
+            StatsScopeId id(scopeId);
+            if (id.queryScopeType() != scopeType)
+                return false;
+        }
+        return true;
+    }
+
+    return false;
+}
+
+//---------------------------------------------------
+
 bool ScopedItemFilter::matchDepth(unsigned low, unsigned high) const
 {
     if (maxDepth && low && maxDepth < low)
@@ -2476,7 +2819,7 @@ bool ScopedItemFilter::match(const char * search) const
 
         if (minDepth || maxDepth)
         {
-            unsigned searchDepth = queryStatisticsDepth(search);
+            unsigned searchDepth = queryScopeDepth(search);
             if (searchDepth < minDepth)
                 return false;
             if (maxDepth && searchDepth > maxDepth)
@@ -2492,7 +2835,7 @@ bool ScopedItemFilter::recurseChildScopes(const char * curScope) const
     if (maxDepth == 0 || !curScope)
         return true;
 
-    if (queryStatisticsDepth(curScope) >= maxDepth)
+    if (queryScopeDepth(curScope) >= maxDepth)
         return false;
     return true;
 }
@@ -2502,7 +2845,7 @@ void ScopedItemFilter::set(const char * _value)
     if (_value && !streq(_value, "*") )
     {
         value.set(_value);
-        minDepth = queryStatisticsDepth(_value);
+        minDepth = queryScopeDepth(_value);
         if (!strchr(_value, '*'))
         {
             maxDepth = minDepth;

+ 112 - 1
system/jlib/jstats.h

@@ -21,10 +21,12 @@
 
 #include "jlib.hpp"
 #include "jmutex.hpp"
+#include <vector>
 
 #include "jstatcodes.h"
 
-const unsigned __int64 MaxStatisticValue = (unsigned __int64)-1;
+typedef unsigned __int64 stat_type;
+const unsigned __int64 MaxStatisticValue = (unsigned __int64)0-1U;
 const unsigned __int64 AnyStatisticValue = MaxStatisticValue; // Use the maximum value to also represent unknown, since it is unlikely to ever occur.
 
 inline StatisticKind queryStatsVariant(StatisticKind kind) { return (StatisticKind)(kind & ~StKindMask); }
@@ -44,9 +46,14 @@ public:
         : name(_name), scopeType(_scopeType)
     {
     }
+    StatsScopeId(const char * _scope)
+    {
+        setScopeText(_scope);
+    }
 
     StatisticScopeType queryScopeType() const { return scopeType; }
     StringBuffer & getScopeText(StringBuffer & out) const;
+    bool isWildcard() const;
 
     unsigned getHash() const;
     bool matches(const StatsScopeId & other) const;
@@ -147,6 +154,26 @@ protected:
     IStatisticGatherer & gatherer;
 };
 
+
+class jlib_decl StatsAggregation
+{
+public:
+    void noteValue(stat_type value);
+
+    stat_type getCount() const { return count; }
+    stat_type getMin() const { return minValue; }
+    stat_type getMax() const { return maxValue; }
+    stat_type getSum() const { return sumValue; }
+    stat_type getAve() const;
+    //MORE: StDev would require a sum of squares.
+
+protected:
+    stat_type count = 0;
+    stat_type sumValue = 0;
+    stat_type minValue = 0;
+    stat_type maxValue = 0;
+};
+
 //---------------------------------------------------------------------------------------------------------------------
 
 class StatsSubgraphScope : public StatsScopeBlock
@@ -210,6 +237,86 @@ protected:
     bool hasWildcard;
 };
 
+class jlib_decl StatisticValueFilter
+{
+public:
+    StatisticValueFilter(StatisticKind _kind, stat_type _minValue, stat_type _maxValue) :
+        kind(_kind), minValue(_minValue), maxValue(_maxValue)
+    {
+    }
+
+    bool matches(stat_type value) const
+    {
+        return ((value >= minValue) && (value <= maxValue));
+    }
+
+    StatisticKind queryKind() const { return kind; }
+
+protected:
+    StatisticKind kind;
+    stat_type minValue;
+    stat_type maxValue;
+};
+
+//These could be template definitions, but that would potentially affect too many classes.
+//MORE: Would it be useful to move this to a common definition point?
+#define BITMASK_ENUM(X) \
+inline constexpr X operator | (X l, X r) { return (X)((unsigned)l | (unsigned)r); } \
+inline constexpr X operator ~ (X l) { return (X)(~(unsigned)l); } \
+inline X & operator |= (X & l, X r) { l = l | r; return l; } \
+inline X & operator &= (X & l, X r) { l = (X)(l & r); return l; }
+
+enum ScopeCompare : unsigned
+{
+    SCunknown   = 0x0000,   //
+    SCparent    = 0x0001,   // is a parent of: w1, w1:g1
+    SCchild     = 0x0002,   // is a child of: w1:g1, w1
+    SCequal     = 0x0004,   // w1:g1, w1:g1 - may extend to wildcards later.
+    SCrelated   = 0x0008,   // w1:g1, w1:g2 - some shared relationship
+    SCunrelated = 0x0010,   // no connection
+};
+BITMASK_ENUM(ScopeCompare);
+
+
+/*
+ * compare two scopes, and return a value indicating their relationship
+ */
+
+extern jlib_decl ScopeCompare compareScopes(const char * scope, const char * key);
+
+class jlib_decl ScopeFilter
+{
+public:
+    ScopeFilter() = default;
+    ScopeFilter(const char * scopeList);
+
+    void addScope(const char * scope);
+    void addScopes(const char * scope);
+    void addScopeType(StatisticScopeType scopeType);
+    void addId(const char * id);
+    void setDepth(unsigned low, unsigned high);
+    void setDepth(unsigned value) { setDepth(value, value); }
+
+    /*
+     * Return a mask containing information about whether the scope will match the filter
+     * It errs on the side of false positives - e.g. SCparent is set if it might be the parent of a match
+     */
+    ScopeCompare compare(const char * scope) const;
+
+    int compareDepth(unsigned depth) const; // -1 too shallow, 0 a match, +1 too deep
+    bool hasSingleMatch() const;
+    const StringArray & queryScopes() const { return scopes; }
+    bool matchOnly(StatisticScopeType scopeType) const;
+
+protected:
+    UnsignedArray scopeTypes;
+    StringArray scopes;
+    StringArray ids;
+    unsigned minDepth = 0;
+    unsigned maxDepth = UINT_MAX;
+};
+
+
 class jlib_decl StatisticsFilter : public CInterfaceOf<IStatisticsFilter>
 {
 public:
@@ -571,6 +678,7 @@ extern jlib_decl unsigned __int64 getIPV4StatsValue(const IpAddress & ip);
 extern jlib_decl void formatStatistic(StringBuffer & out, unsigned __int64 value, StatisticMeasure measure);
 extern jlib_decl void formatStatistic(StringBuffer & out, unsigned __int64 value, StatisticKind kind);
 extern jlib_decl void formatTimeStampAsLocalTime(StringBuffer & out, unsigned __int64 value);
+extern jlib_decl stat_type readStatisticValue(const char * cur, const char * * end, StatisticMeasure measure);
 
 extern jlib_decl unsigned __int64 mergeStatistic(StatisticMeasure measure, unsigned __int64 value, unsigned __int64 otherValue);
 extern jlib_decl unsigned __int64 mergeStatisticValue(unsigned __int64 prevValue, unsigned __int64 newValue, StatsMergeAction mergeAction);
@@ -612,6 +720,9 @@ extern jlib_decl unsigned __int64 extractTimeCollatable(const char *s, bool nano
 //activities are in numeric order
 //edges must come before activities.
 extern jlib_decl int compareScopeName(const char * left, const char * right);
+extern jlib_decl unsigned queryScopeDepth(const char * text);
+extern jlib_decl const char * queryScopeTail(const char * scope);
+extern jlib_decl bool getParentScope(StringBuffer & parent, const char * scope);
 
 //This interface is primarily here to reduce the dependency between the different components.
 interface IStatisticTarget

+ 1 - 1
system/jlib/jstring.cpp

@@ -2463,7 +2463,7 @@ bool endsWithIgnoreCase(const char* src, const char* dst)
     return false;
 }
 
-unsigned matchString(const char * search, const char * * strings)
+unsigned matchString(const char * search, const char * const * strings)
 {
     for (unsigned i=0;;i++)
     {

+ 1 - 1
system/jlib/jstring.hpp

@@ -579,7 +579,7 @@ inline bool hasPrefix(const char * text, const char * prefix, bool caseSensitive
 }
 
 // Search for a string in a null terminated array of const char * strings
-extern jlib_decl unsigned matchString(const char * search, const char * * strings);
+extern jlib_decl unsigned matchString(const char * search, const char * const * strings);
 
 extern jlib_decl char *j_strtok_r(char *str, const char *delim, char **saveptr);
 extern jlib_decl int j_memicmp (const void *s1, const void *s2, size32_t len); 

+ 14 - 3
tools/wutool/wutool.cpp

@@ -1986,8 +1986,8 @@ protected:
 
     void testWuDetails(IConstWorkUnit * wu)
     {
-        const StatisticsFilter filter;
-        Owned<IConstWUScopeIterator> iter = &wu->getScopeIterator(&filter);
+        const WuScopeFilter filter;
+        Owned<IConstWUScopeIterator> iter = &wu->getScopeIterator(filter);
         DBGLOG("%s %s", wu->queryWuid(), wu->queryClusterName());
         AttributeScopeVisitor visitor;
         StringBuffer prevScope;
@@ -2000,12 +2000,23 @@ protected:
                 ASSERT(compareScopeName(prevScope.str(), scope) < 0);
             prevScope.set(scope);
 
-            iter->playProperties(visitor);
+            iter->playProperties(PTall, visitor);
         }
     }
 
     void testWuDetails()
     {
+        /*
+         * The following are the tests that need to be done on the filters
+         * o Source
+         * o Scope type
+         * o Set of scope types (activity, edges)
+         * o Depth
+         * o Single scope
+         * o Set of scopes
+         * o Set of ids
+         * They should all be selectable via a text filter.
+         */
 #if 0
         WUSortField filterByJob[] = { WUSFjob, WUSFterm };
         CCycleTimer timer;