Преглед изворни кода

HPCC-15751 Use getLikelihood of filter condition to decide whether to spill

Signed-off-by: Shamser Ahmed <shamser.ahmed@lexisnexis.co.uk>
Shamser Ahmed пре 9 година
родитељ
комит
2ee048c58a

+ 23 - 0
ecl/hql/hqlattr.cpp

@@ -3868,6 +3868,29 @@ double queryLikelihood(IHqlExpression * expr)
     return likelihoodExpr->queryValue()->getRealValue();
 }
 
+double queryActivityLikelihood(IHqlExpression * expr)
+{
+    assertex(expr->getOperator() == no_filter);
+    double filterLikelihood = 1.0;
+    ForEachChildFrom(idx, expr, 1)
+    {
+        IHqlExpression *child = expr->queryChild(idx);
+        if (child->isAttribute())
+            continue;
+        double likelihood = queryLikelihood(child);
+        if (isKnownLikelihood(likelihood))
+            // Combine the likelihood of the 2 filter conditions
+            // N.B. this only works if the filter probability are independent
+            filterLikelihood *= likelihood;
+        else
+        {
+            // One of the filter probability is unknown, so the overall probability is unknown
+            setUnknownLikelihood(filterLikelihood);
+            break;
+        }
+    }
+    return filterLikelihood;
+}
 //---------------------------------------------------------------------------------------------------------------------
 
 IInterface * CHqlExpression::queryExistingProperty(ExprPropKind propKind) const

+ 5 - 1
ecl/hql/hqlattr.hpp

@@ -71,7 +71,10 @@ inline bool hasOutOfLineRows(ITypeInfo * type) { return (hasOutOfLineModifier(ty
 inline bool hasLinkCountedModifier(IHqlExpression * expr)    { return hasLinkCountedModifier(expr->queryType()); }
 inline bool hasStreamedModifier(ITypeInfo * t)   { return queryAttribute(t, streamedAtom) != NULL; }
 inline bool isStreamed(IHqlExpression * expr) { return hasStreamedModifier(expr->queryType()); }
-inline bool isKnownLikelihood(double p) { return p >= 0; }
+
+const static double unknownLikelihood = - 1.0;
+inline bool isKnownLikelihood(double p) { return p != unknownLikelihood; }
+inline void setUnknownLikelihood(double &p) { p = unknownLikelihood;}
 
 extern HQL_API ITypeInfo * setLinkCountedAttr(ITypeInfo * _type, bool setValue);
 extern HQL_API ITypeInfo * setStreamedAttr(ITypeInfo * _type, bool setValue);
@@ -82,6 +85,7 @@ extern HQL_API IHqlExpression * getRecordCountInfo(IHqlExpression * expr);
 extern HQL_API bool hasNoMoreRowsThan(IHqlExpression * expr, __int64 limit);
 extern HQL_API bool spillToWorkunitNotFile(IHqlExpression * expr, ClusterType platform);
 extern HQL_API double queryLikelihood(IHqlExpression * expr);
+extern HQL_API double queryActivityLikelihood(IHqlExpression * expr);
 extern HQL_API IHqlExpression * queryFixedRowCount(IHqlExpression * expr);
 
 class CHqlMetaProperty;

+ 54 - 7
ecl/hqlcpp/hqlresource.cpp

@@ -3081,6 +3081,7 @@ bool ResourcerInfo::expandRatherThanSpill(bool noteOtherSpills)
             return (info->queryTransformed() == NULL);
     }
     bool isFiltered = false;
+    double filterLikelihood = 1.0;
     bool isProcessed = false;
     loop
     {
@@ -3098,7 +3099,7 @@ bool ResourcerInfo::expandRatherThanSpill(bool noteOtherSpills)
                 //This is only executed for hthor/thor.  Roxie has used expandRatherThanSplit().
                 //We need to balance the saving from reading reduced data in the other branches with the cost of
                 //writing the spill file to disk.
-                if (isFiltered && (numExternalUses >= options->filteredSpillThreshold))
+                if (isFiltered && numExternalUses >= options->filteredSpillThreshold)
                     return false;
                 IHqlExpression * mode = expr->queryChild(2);
                 switch (mode->getOperator())
@@ -3106,10 +3107,37 @@ bool ResourcerInfo::expandRatherThanSpill(bool noteOtherSpills)
                 case no_thor: case no_flat:
                     //MORE: The following is possibly better - but roxie should be able to read from non spill data files in child queries fine
                     //if ((options->targetClusterType == RoxieCluster) && linkedFromChild)) return false;
-                    return true;
+                    break;
                 default:
                     return false;
                 }
+                if (isFiltered)
+                {
+                    if (isKnownLikelihood(filterLikelihood))
+                    {
+                        // Calculation of when to spill/not spill:
+                        //    Where :
+                        //      r = cost(read), w = cost(write), f = cost(filter),
+                        //      n = number uses, p = probability of filter(likelihood)
+                        //
+                        //    Cost of using spill files:
+                        //      = r + f + pw + npr
+                        //      = r + rp + npr          (assuming w~=r and f~=0)
+                        //
+                        //    Cost of not spilling (expanding)
+                        //     = n(r+f)
+                        //     = nr                     (assuming f~=0)
+                        //
+                        // Spill when "cost of using spill files" < "cost of not spilling (expanding)"
+                        //     r + rp + npr < nr
+                        // Which simplifies to :
+                        //     p < (n - 1) / (n +1)
+                        if (filterLikelihood < (double)(numUses-1)/(numUses+1))
+                            return false;
+                        return true;
+                    }
+                }
+                return true;
             }
         case no_stepped:
             return true;
@@ -3192,9 +3220,22 @@ bool ResourcerInfo::expandRatherThanSpill(bool noteOtherSpills)
             expr = expr->queryChild(0);
             break;
         case no_filter:
-            isFiltered = true;
-            expr = expr->queryChild(0);
-            break;
+            {
+                if (isKnownLikelihood(filterLikelihood))
+                {
+                    double likelihood = queryActivityLikelihood(expr);
+                    if (isKnownLikelihood(likelihood))
+                        // Combine the likelihood of the 2 filter conditions
+                        // N.B. this only works if the filter probability are independent
+                        filterLikelihood *= likelihood;
+                    else
+                        // One of the filter probability is unknown, so the overall probability is unknown
+                        setUnknownLikelihood(filterLikelihood);
+                }
+                isFiltered = true;
+                expr = expr->queryChild(0);
+                break;
+            }
         case no_select:
             {
                 if (options->targetClusterType == RoxieCluster)
@@ -3219,8 +3260,14 @@ bool ResourcerInfo::expandRatherThanSpill(bool noteOtherSpills)
                     return (info->queryTransformed() == NULL);
                 if (info->numExternalUses)
                 {
-                    if (isFiltered && (numExternalUses >= options->filteredSpillThreshold))
-                        return false;
+                    if (isFiltered)
+                    {
+                        if (numExternalUses >= options->filteredSpillThreshold)
+                            return false;
+                        if (isKnownLikelihood(filterLikelihood) &&
+                            (filterLikelihood < (double)(numUses-1)/(numUses+1)))
+                            return false;
+                    }
                     return true;
                 }
             }

+ 25 - 2
ecl/hqlcpp/hqlsource.cpp

@@ -1819,19 +1819,31 @@ ABoundActivity * SourceBuilder::buildActivity(BuildCtx & ctx, IHqlExpression * e
     StringBuffer graphLabel;
     graphLabel.append(getActivityText(activityKind));
 
+    bool isFiltered = false;
+    double filterLikelihood = 1.0;
     if ((activityKind == TAKdiskread) || (activityKind == TAKcsvread) || (activityKind == TAKxmlread) || (activityKind == TAKjsonread))
     {
         graphLabel.clear();
         if (expr != tableExpr)
         {
             IHqlExpression * cur = expr;
-            bool isFiltered = false;
             bool isProjected = false;
             loop
             {
                 switch (cur->getOperator())
                 {
                 case no_filter:
+                    if (isKnownLikelihood(filterLikelihood))
+                    {
+                        double likelihood = queryActivityLikelihood(cur);
+                        if (isKnownLikelihood(likelihood))
+                            // Combine the likelihood of the 2 filter conditions
+                            // N.B. this only works if the filter probability are independent
+                            filterLikelihood *= likelihood;
+                        else
+                            // One of the filter probability is unknown, so the overall probability is unknown
+                            setUnknownLikelihood(filterLikelihood);
+                    }
                     isFiltered = true;
                     break;
                 case no_hqlproject:
@@ -1851,7 +1863,9 @@ ABoundActivity * SourceBuilder::buildActivity(BuildCtx & ctx, IHqlExpression * e
             }
 
             if (isFiltered)
+            {
                 graphLabel.append("Filtered\n");
+            }
             if (isProjected)
                 graphLabel.append("Projected\n");
         }
@@ -1999,7 +2013,16 @@ ABoundActivity * SourceBuilder::buildActivity(BuildCtx & ctx, IHqlExpression * e
         instance->addAttributeBool("_isTransformSpill", isSpill);
     else
         instance->addAttributeBool("_isSpill", isSpill);
-
+    if (isFiltered)
+    {
+        if (isKnownLikelihood(filterLikelihood))
+        {
+            StringBuffer text;
+            filterLikelihood *= 100;
+            text.setf("%3.2f%%", filterLikelihood);
+            instance->addAttribute("matchLikelihood", text);
+        }
+    }
     IHqlExpression * spillReason = tableExpr ? queryAttributeChild(tableExpr, _spillReason_Atom, 0) : NULL;
 
     if (spillReason && !translator.queryOptions().obfuscateOutput)

+ 1 - 1
ecl/regress/likely.ecl

@@ -19,7 +19,7 @@ person := dataset('person', { unsigned8 person_id, string1 per_sex, string40 per
 
 filtered1 := person( LIKELY(xpos < 1000) );
 
-filter2 := filtered1( LIKELY(per_last_name = 'Hawthorn', 0.5) ) ;
+filter2 := filtered1( LIKELY(person_id > 1000, 0.01) ) ;
 filter3 := filtered1( LIKELY(per_last_name = 'Hawthorn', 0.99) ) ;
 filter4 := filtered1( UNLIKELY(per_last_name != 'Drimbad' AND per_sex = 'F' ) );
 filter5 := filtered1( UNLIKELY(per_last_name = 'Drimbad') );

+ 156 - 0
testing/regress/ecl/key/likely.xml

@@ -0,0 +1,156 @@
+<Dataset name='Result 1'>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+</Dataset>
+<Dataset name='Result 8'>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+</Dataset>
+<Dataset name='Result 11'>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+ <Row><personid>1</personid><firstname>TIMTOHY        </firstname><lastname>PRUNER                   </lastname><age>24</age><city>ABBEVILE                 </city></Row>
+</Dataset>
+<Dataset name='Result 12'>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+</Dataset>
+<Dataset name='Result 13'>
+ <Row><personid>2</personid><firstname>ALCIAN         </firstname><lastname>STRACK                   </lastname><age>33</age><city>AGOURA HILLS             </city></Row>
+ <Row><personid>3</personid><firstname>CHAMENE        </firstname><lastname>TRAYLOR                  </lastname><age>44</age><city>ABELL                    </city></Row>
+ <Row><personid>4</personid><firstname>HATIM          </firstname><lastname>BARENT                   </lastname><age>62</age><city>AIEA                     </city></Row>
+ <Row><personid>10</personid><firstname>MINGWAI        </firstname><lastname>MINTE                    </lastname><age>35</age><city>ABILENE                  </city></Row>
+ <Row><personid>5</personid><firstname>MIRCHINE       </firstname><lastname>CECCHETTI                </lastname><age>22</age><city>ANDOVER                  </city></Row>
+ <Row><personid>9</personid><firstname>NAATALIE       </firstname><lastname>VANSPANJE                </lastname><age>29</age><city>ANDALE                   </city></Row>
+ <Row><personid>7</personid><firstname>NAIR           </firstname><lastname>KREDA                    </lastname><age>29</age><city>ANDREWS                  </city></Row>
+ <Row><personid>6</personid><firstname>RENORDA        </firstname><lastname>HEERMANN                 </lastname><age>19</age><city>ABERDEEN                 </city></Row>
+ <Row><personid>11</personid><firstname>RITUA          </firstname><lastname>CORLETO                  </lastname><age>42</age><city>ANCHORAGE                </city></Row>
+ <Row><personid>8</personid><firstname>RYLE           </firstname><lastname>REDMAN                   </lastname><age>61</age><city>AKROW                    </city></Row>
+</Dataset>

+ 108 - 0
testing/regress/ecl/likely.ecl

@@ -0,0 +1,108 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2016 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//nohthor
+//noroxie
+
+PeopleFileName := 'regress::thor::people';
+
+// GENERATE People Dataset
+Layout_Person := RECORD
+  UNSIGNED1 PersonID;
+  STRING15  FirstName;
+  STRING25  LastName;
+  UNSIGNED  Age;
+  STRING25  City;
+END;
+
+ds0 := DATASET([ {  1, 'TIMTOHY',  'PRUNER',    24, 'ABBEVILE'},
+                 {  2, 'ALCIAN',   'STRACK',    33, 'AGOURA HILLS'},
+                 {  3, 'CHAMENE',  'TRAYLOR',   44, 'ABELL'},
+                 {  4, 'HATIM',    'BARENT',    62, 'AIEA'},
+                 {  5, 'MIRCHINE', 'CECCHETTI', 22, 'ANDOVER'},
+                 {  6, 'RENORDA',  'HEERMANN',  19, 'ABERDEEN'},
+                 {  7, 'NAIR',     'KREDA',     29, 'ANDREWS'},
+                 {  8, 'RYLE',     'REDMAN',    61, 'AKROW'},
+                 {  9, 'NAATALIE', 'VANSPANJE', 29, 'ANDALE'},
+                 { 10, 'MINGWAI',  'MINTE',     35, 'ABILENE'},
+                 { 11, 'RITUA',    'CORLETO',   42, 'ANCHORAGE'} ], Layout_Person);
+
+SetupPeople := OUTPUT(ds0,,PeopleFileName, OVERWRITE);
+
+// Test Likely/Unlikely
+PeopleDS1 := dataset(PeopleFileName, Layout_Person, thor);
+
+// Spill
+filter0 := PeopleDS1( LIKELY(FirstName <> '', 0.01) );
+filter0_1 := filter0( LIKELY(City <>'', 0.01) );
+filter0_2 := filter0( LIKELY(Age < 99, 0.01));
+filter0_1_1 := sort(filter0_1( LIKELY(LastName<>'xxx', 0.01)), LastName );
+filter0_2_1 := sort(filter0_2( LIKELY(LastName<>'yyy', 0.01)), FirstName );
+
+// Expand rather than spill
+filter1 := PeopleDS1( LIKELY(FirstName <> '', 0.99) );
+filter1_1 := filter1( LIKELY(City <>'', 0.99) );
+filter1_2 := filter1( LIKELY(Age < 99, 0.99));
+filter1_1_1 := sort(filter1_1( LIKELY(LastName<>'xxx', 0.99)), LastName );
+filter1_2_1 := sort(filter1_2( LIKELY(LastName<>'yyy', 0.99)), FirstName );
+
+// Spill
+filter2 := PeopleDS1( LIKELY(FirstName <> '', 0.9) );
+filter2_1 := filter2( LIKELY(City <>'', 0.4) );
+filter2_2 := filter2( LIKELY(Age < 99, 0.4));
+filter2_1_1 := sort(filter2_1( LIKELY(LastName<>'xxx', 0.1)), LastName );
+filter2_1_2 := sort(filter2_1( LIKELY(LastName<>'abc', 0.1)), LastName );
+filter2_2_1 := sort(filter2_2( LIKELY(LastName<>'yyy', 0.1)), FirstName );
+filter2_2_2 := sort(filter2_2( LIKELY(LastName<>'def', 0.1)), FirstName );
+
+// Expand rather than spill
+filter3 := PeopleDS1( LIKELY(FirstName <> '', 0.9), HINT(gofaster(true)) );
+filter3_1 := filter3( LIKELY(City <>'', 0.4) );
+filter3_2 := filter3( LIKELY(Age < 99, 0.4));
+filter3_1_1 := sort(filter3_1( LIKELY(LastName<>'xxx', 0.1)), LastName );
+filter3_1_2 := sort(filter3_1( LIKELY(LastName<>'abc', 0.1)), LastName );
+filter3_2_1 := sort(filter3_2( LIKELY(LastName<>'yyy', 0.1)), FirstName );
+filter3_2_2 := sort(filter3_2( LIKELY(LastName<>'def', 0.1)), FirstName );
+filter3_2_2_1 := filter3_2_2( LIKELY(Age <> 24,0.1234), HINT(maxnumber(110)) );
+filter3_2_2_2 := filter3_2_2( LIKELY(Age <> 4,0.01) );
+filter3_2_2_1_1 := filter3_2_2_1( LIKELY(PersonId <> 34,0.01) );
+filter3_2_2_1_2 := filter3_2_2_1( LIKELY(PersonId <> 23,0.01) );
+
+SEQUENTIAL(
+  SetupPeople,
+  PARALLEL(
+    OUTPUT(filter0_1_1);
+    OUTPUT(filter0_2_1);
+  ),
+  PARALLEL(
+    OUTPUT(filter1_1_1);
+    OUTPUT(filter1_2_1);
+  ),
+  PARALLEL(
+    OUTPUT(filter2_1_1);
+    OUTPUT(filter2_1_2);
+    OUTPUT(filter2_2_1);
+    OUTPUT(filter2_2_2);
+  ),
+  PARALLEL(
+    OUTPUT(filter3_2_1);
+    OUTPUT(filter3_2_2);
+    OUTPUT(filter3_2_2_1_1);
+    OUTPUT(filter3_2_2_1_2);
+  )
+);
+