Browse Source

HPCC-9589 Introduce pseudo tables flags for xml/nlp parse

These can be used instead of the current flags in the context.  However because
of issues with hoisting table invariants currently ignoring counters the
switch from the flags can't be done yet.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 12 years ago
parent
commit
f3e2515590

+ 4 - 2
ecl/hql/hqlatoms.cpp

@@ -231,7 +231,6 @@ IAtom * lzwAtom;
 IAtom * macroAtom;
 IAtom * manyAtom;
 IAtom * markerAtom;
-IAtom * matchxxxPseudoFileAtom;
 IAtom * maxAtom;
 IAtom * maxCountAtom;
 IAtom * maxLengthAtom;
@@ -250,6 +249,7 @@ IAtom * namedAtom;
 IAtom * namespaceAtom;
 IAtom * newAtom;
 IAtom * newSetAtom;
+IAtom * _nlpParse_Atom;
 IAtom * noBoundCheckAtom;
 IAtom * noCaseAtom;
 IAtom * _noHoist_Atom;
@@ -410,6 +410,7 @@ IAtom * workunitAtom;
 IAtom * wuidAtom;
 IAtom * xmlAtom;
 IAtom * xmlDefaultAtom;
+IAtom * _xmlParse_Atom;
 IAtom * xpathAtom;
 
 #define MAKEID(x)   x##Id = createIdAtom(#x)
@@ -634,7 +635,6 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(macro);
     MAKEATOM(many);
     MAKEATOM(marker);
-    MAKEATOM(matchxxxPseudoFile);
     MAKEATOM(max);
     MAKEATOM(maxCount);
     MAKEATOM(maxLength);
@@ -653,6 +653,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(namespace);
     MAKEATOM(new);
     MAKEATOM(newSet);
+    MAKESYSATOM(nlpParse);
     MAKEATOM(noBoundCheck);
     MAKEATOM(noCase);
     MAKESYSATOM(noHoist);
@@ -812,6 +813,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(wuid);
     MAKEATOM(xml);
     MAKEATOM(xmlDefault);
+    MAKESYSATOM(xmlParse);
     MAKEATOM(xpath);
 
     return true;

+ 2 - 1
ecl/hql/hqlatoms.hpp

@@ -234,7 +234,6 @@ extern HQL_API IAtom * lzwAtom;
 extern HQL_API IAtom * macroAtom;
 extern HQL_API IAtom * manyAtom;
 extern HQL_API IAtom * markerAtom;
-extern HQL_API IAtom * matchxxxPseudoFileAtom;
 extern HQL_API IAtom * maxAtom;
 extern HQL_API IAtom * maxCountAtom;
 extern HQL_API IAtom * maxLengthAtom;
@@ -253,6 +252,7 @@ extern HQL_API IAtom * namedAtom;
 extern HQL_API IAtom * namespaceAtom;
 extern HQL_API IAtom * newAtom;
 extern HQL_API IAtom * newSetAtom;
+extern HQL_API IAtom * _nlpParse_Atom;
 extern HQL_API IAtom * noBoundCheckAtom;
 extern HQL_API IAtom * noCaseAtom;
 extern HQL_API IAtom * _noHoist_Atom;
@@ -414,6 +414,7 @@ extern HQL_API IAtom * workunitAtom;
 extern HQL_API IAtom * wuidAtom;
 extern HQL_API IAtom * xmlAtom;
 extern HQL_API IAtom * xmlDefaultAtom;
+extern HQL_API IAtom * _xmlParse_Atom;
 extern HQL_API IAtom * xpathAtom;
 
 inline bool isInternalAttributeName(IAtom * name) { return (name->str()[0] == '$'); }

+ 5 - 1
ecl/hql/hqlattr.cpp

@@ -3424,7 +3424,11 @@ IHqlExpression * HqlLocationIndependentNormalizer::doCreateTransformed(IHqlExpre
         {
             //Original attributes cause chaos => remove all children from attributes
             if (expr->numChildren() != 0)
-                return createAttribute(expr->queryName());
+            {
+                IAtom * name = expr->queryName();
+                if (name != _countProject_Atom)
+                    return createAttribute(expr->queryName());
+            }
             return LINK(expr);
         }
     case no_field:

+ 34 - 13
ecl/hql/hqlexpr.cpp

@@ -71,8 +71,8 @@
 //#define DEBUG_SCOPE
 //#define CHECK_RECORD_CONSISTENCY
 //#define PARANOID
-//#define SEARCH_NAME1   "vL6R"
-//#define SEARCH_NAME2   "v19"
+//#define SEARCH_NAME1   "v1"
+//#define SEARCH_NAME2   "v2"
 //#define SEARCH_IEXPR 0x0681cb0
 //#define CHECK_SELSEQ_CONSISTENCY
 //#define GATHER_COMMON_STATS
@@ -4816,8 +4816,13 @@ void CUsedTablesBuilder::cleanupProduction()
 {
     ForEachItemInRev(i, inScopeTables)
     {
-        if (inScopeTables.item(i).getOperator() == no_matchattr)
+        switch (inScopeTables.item(i).getOperator())
+        {
+        case no_matchattr:
+        case no_matchrow:
             inScopeTables.remove(i);
+            break;
+        }
     }
 }
 
@@ -4970,6 +4975,18 @@ void CHqlExpressionWithTables::cacheTablesProcessChildScope(CUsedTablesBuilder &
             used.removeActive(left);
             used.removeParent(ds);
             used.removeRows(this, left, NULL);
+            switch (op)
+            {
+            case no_parse:
+            case no_newparse:
+                used.cleanupProduction();
+                used.removeActive(queryNlpParsePseudoTable());
+                break;
+            case no_xmlparse:
+            case no_newxmlparse:
+                used.removeActive(queryXmlParsePseudoTable());
+                break;
+            }
             cacheChildrenTablesUsed(used, 0, 1);
 #ifdef GATHER_HIDDEN_SELECTORS
             used.addHiddenTable(left, selSeq);
@@ -5201,15 +5218,6 @@ void CHqlExpressionWithTables::cacheTablesUsed()
                         used.cleanupProduction();
                         break;
                     }
-                case no_parse:
-                case no_newparse:
-                    {
-                        cacheTablesProcessChildScope(used);
-                        used.cleanupProduction();
-                        //Not strictly true - need to inherit from arg(0) if present.
-                        used.removeActive(queryMatchxxxPseudoFile());
-                        break;
-                    }
                 default:
                     {
                         ITypeInfo * thisType = queryType();
@@ -5242,6 +5250,11 @@ void CHqlExpressionWithTables::cacheTablesUsed()
             }
             switch (op)
             {
+            case no_xmltext:
+            case no_xmlunicode:
+            case no_xmlproject:
+                used.addActiveTable(queryXmlParsePseudoTable());
+                break;
             case no_matched:
             case no_matchtext:
             case no_matchunicode:
@@ -5249,8 +5262,16 @@ void CHqlExpressionWithTables::cacheTablesUsed()
             case no_matchposition:
             case no_matchrow:
             case no_matchutf8:
-                used.addActiveTable(queryMatchxxxPseudoFile());
+            case no_matchattr:
+                used.addActiveTable(queryNlpParsePseudoTable());
                 break;
+            case no_externalcall:
+                {
+                    IHqlExpression * def = queryExternalDefinition()->queryChild(0);
+                    if (def->hasAttribute(userMatchFunctionAtom))
+                        used.addActiveTable(queryNlpParsePseudoTable());
+                    break;
+                }
             }
             used.set(usedTables);
         }

+ 8 - 2
ecl/hql/hqlopt.cpp

@@ -1782,9 +1782,15 @@ IHqlExpression * CTreeOptimizer::moveProjectionOverSimple(IHqlExpression * trans
     {
         if (idx != 0)
         {
-            bool ok = false;
+            bool ok = true;
             IHqlExpression * cur = child->queryChild(idx);
-            IHqlExpression * collapsed = mapper->collapseFields(cur, grandchild, newProject, &ok);
+            IHqlExpression * collapsed;
+            //NB: Attributes are generally independent of the input dataset, so they shouldn't be reverse mapped,
+            //otherwise if a input-invariant expression is projected it can cause problems (jholt44.eclxml)
+            if (cur->isAttribute())
+                collapsed = LINK(cur);
+            else
+                collapsed = mapper->collapseFields(cur, grandchild, newProject, &ok);
             if (!ok)
             {
                 ::Release(collapsed);

+ 7 - 8
ecl/hql/hqlpmap.cpp

@@ -315,28 +315,28 @@ void NewProjectMapper2::setUnknownMapping()
     mapping = queryUnknownAttribute();
 }
 
-void NewProjectMapper2::initMapping()
+bool NewProjectMapper2::ensureMapping()
 {
     if (targets.ordinality())
-        return;
+        return true;
 
     switch (mapping->getOperator())
     {
     case no_record:
         setRecord(mapping);
-        break;
+        return true;
     case no_newtransform:
         setTransform(mapping);
-        break;
+        return true;
     case no_transform:
         setTransform(mapping);
-        break;
+        return true;
     case no_alias_scope:
     case no_none:
     case no_externalcall:
     case no_outofline:
     case no_attr:
-        break;              // avoid internal error when values not provided for a record structure
+        return false;              // avoid internal error when values not provided for a record structure
     default:
         UNIMPLEMENTED_XY("mapping", getOpString(mapping->getOperator()));
         break;
@@ -351,8 +351,7 @@ void NewProjectMapper2::initSelf(IHqlExpression * dataset)
 
 bool NewProjectMapper2::isMappingKnown()
 {
-    initMapping();
-    return targets.ordinality() != 0;
+    return ensureMapping();
 }
 
 void NewProjectMapper2::setRecord(IHqlExpression * record, IHqlExpression * selector)

+ 1 - 2
ecl/hql/hqlpmap.hpp

@@ -51,8 +51,7 @@ public:
 private:
     void addMapping(IHqlExpression * field, IHqlExpression * expr);
 
-    inline void ensureMapping()                                 { if (targets.ordinality() == 0) initMapping(); }
-    void initMapping();
+    bool ensureMapping();
 
     void setRecord(IHqlExpression * record, IHqlExpression * selector);
     void setRecord(IHqlExpression * record);

+ 0 - 2
ecl/hql/hqlthql.cpp

@@ -681,8 +681,6 @@ void HqltHql::toECL(IHqlExpression *expr, StringBuffer &s, bool paren, bool inTy
             s.append('[');
             unsigned flags = expr->getInfoFlags();
             if (flags & HEFgraphDependent) s.append('G');
-            if (flags & HEFcontainsNlpText) s.append('N');
-            if (flags & HEFcontainsXmlText) s.append('X');
             if (flags & HEFcontainsSkip) s.append('S');
             if (flags & HEFcontainsCounter) s.append('C');
             if (flags & HEFtransformDependent) s.append('D');

+ 13 - 5
ecl/hql/hqlutil.cpp

@@ -56,7 +56,8 @@ static IHqlExpression * cacheLinkCountedAttr;
 static IHqlExpression * cacheReferenceAttr;
 static IHqlExpression * cacheStreamedAttr;
 static IHqlExpression * cacheUnadornedAttr;
-static IHqlExpression * matchxxxPseudoFile;
+static IHqlExpression * nlpParsePsuedoTable;
+static IHqlExpression * xmlParsePsuedoTable;
 static IHqlExpression * cachedQuotedNullExpr;
 static IHqlExpression * cachedGlobalSequenceNumber;
 static IHqlExpression * cachedLocalSequenceNumber;
@@ -84,7 +85,8 @@ MODULE_INIT(INIT_PRIORITY_STANDARD)
     cacheReferenceAttr = createAttribute(referenceAtom);
     cacheStreamedAttr = createAttribute(streamedAtom);
     cacheUnadornedAttr = createAttribute(_propUnadorned_Atom);
-    matchxxxPseudoFile = createDataset(no_pseudods, createRecord()->closeExpr(), createAttribute(matchxxxPseudoFileAtom));
+    nlpParsePsuedoTable = createDataset(no_pseudods, createRecord()->closeExpr(), createAttribute(_nlpParse_Atom));
+    xmlParsePsuedoTable = createDataset(no_pseudods, createRecord()->closeExpr(), createAttribute(_xmlParse_Atom));
     cachedQuotedNullExpr = createValue(no_nullptr, makeBoolType());
     cachedOmittedValueExpr = createValue(no_omitted, makeAnyType());
 
@@ -110,7 +112,8 @@ MODULE_EXIT()
     cacheReferenceAttr->Release();
     cacheStreamedAttr->Release();
     cacheUnadornedAttr->Release();
-    matchxxxPseudoFile->Release();
+    xmlParsePsuedoTable->Release();
+    nlpParsePsuedoTable->Release();
     cachedQuotedNullExpr->Release();
     cachedGlobalSequenceNumber->Release();
     cachedLocalSequenceNumber->Release();
@@ -230,9 +233,14 @@ extern HQL_API IHqlExpression * getReferenceAttr()
     return LINK(cacheReferenceAttr);
 }
 
-extern HQL_API IHqlExpression * queryMatchxxxPseudoFile()
+extern HQL_API IHqlExpression * queryNlpParsePseudoTable()
 {
-    return matchxxxPseudoFile;
+    return nlpParsePsuedoTable;
+}
+
+extern HQL_API IHqlExpression * queryXmlParsePseudoTable()
+{
+    return xmlParsePsuedoTable;
 }
 
 IHqlExpression * getGlobalSequenceNumber()      { return LINK(cachedGlobalSequenceNumber); }

+ 2 - 1
ecl/hql/hqlutil.hpp

@@ -585,7 +585,8 @@ extern HQL_API IHqlExpression * getFixedSizeAttr(unsigned size);
 extern HQL_API IHqlExpression * queryAlignedAttr();
 extern HQL_API IHqlExpression * queryLinkCountedAttr();
 extern HQL_API IHqlExpression * queryUnadornedAttr();
-extern HQL_API IHqlExpression * queryMatchxxxPseudoFile();
+extern HQL_API IHqlExpression * queryNlpParsePseudoTable();
+extern HQL_API IHqlExpression * queryXmlParsePseudoTable();
 extern HQL_API IHqlExpression * queryQuotedNullExpr();
 
 extern HQL_API IHqlExpression * getEmbeddedAttr();

+ 6 - 0
ecl/hqlcpp/hqlcse.cpp

@@ -116,6 +116,9 @@ bool canCreateTemporary(IHqlExpression * expr)
     case type_transform:
     case type_null:
     case type_void:
+    case type_rule:
+    case type_pattern:
+    case type_token:
         return false;
     default:
         return true;
@@ -1318,6 +1321,9 @@ bool TableInvariantTransformer::isInvariant(IHqlExpression * expr)
         break;
     default:
         if (!isContextDependent(expr))
+        //MORE: The following line is needed if the xml/parse flags are removed from the context, but it causes problems
+        //preventing counts from being hoisted as aliases.  That is really correct - but it makes code worse for some examples.
+        //if (!isContextDependent(expr) && expr->isIndependentOfScope())
         {
             if (!expr->isAction())// && !expr->isDataset() && !expr->isDatarow())
             {

+ 4 - 0
ecl/hqlcpp/hqlhtcpp.cpp

@@ -16184,6 +16184,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityXmlParse(BuildCtx & ctx, IHqlE
 
     OwnedHqlExpr helperName = createQuoted("parsed", makeBoolType());
     funcctx.associateExpr(xmlColumnProviderMarkerExpr, helperName);
+    bindTableCursor(funcctx, queryXmlParsePseudoTable(), queryXmlParsePseudoTable());
     xmlUsesContents = false;
     doTransform(funcctx, transform, selfCursor);
     buildReturnRecordSize(funcctx, selfCursor);
@@ -17834,6 +17835,9 @@ static void logECL(const LogMsgCategory & category, size32_t len, const char * e
 
 void HqlCppTranslator::traceExpression(const char * title, IHqlExpression * expr, unsigned level)
 {
+    if (!expr)
+        return;
+
     checkAbort();
 
     LOG(MCdebugInfo(200), unknownJob, "Tracing expressions: %s", title);

+ 2 - 2
ecl/hqlcpp/hqlinline.cpp

@@ -1362,7 +1362,7 @@ bool EvalContext::evaluateInParent(BuildCtx & ctx, IHqlExpression * expr, bool h
     if (isContextDependent(expr))
         return false;
 
-    if (!containsActiveDataset(expr))
+    if (isIndependentOfScope(expr))
         return true;//isColocal();
 
     //If can evaluate in parent's start context then always worth doing there.
@@ -1589,7 +1589,7 @@ AliasKind ClassEvalContext::evaluateExpression(BuildCtx & ctx, IHqlExpression *
 
         if (!isContextDependentExceptGraph(value))
         {
-            if (!isContextDependent(value) && !containsActiveDataset(value))
+            if (!isContextDependent(value) && !containsActiveDataset(value) && value->isIndependentOfScope())
             {
                 createMemberAlias(onCreate, ctx, value, tgt);
                 return CreateTimeAlias;

+ 2 - 2
ecl/hqlcpp/hqlnlp.cpp

@@ -257,7 +257,7 @@ void NlpParseContext::buildValidators(HqlCppTranslator & translator, BuildCtx &
             }
             validctx.associateExpr(activeNlpMarkerExpr, activeNlpMarkerExpr);
             validctx.associateExpr(activeValidateMarkerExpr, activeValidateMarkerExpr);
-            translator.bindTableCursor(validctx, queryMatchxxxPseudoFile(), queryMatchxxxPseudoFile());
+            translator.bindTableCursor(validctx, queryNlpParsePseudoTable(), queryNlpParsePseudoTable());
             if (translator.queryOptions().spotCSE)
                 validateExpr.setown(spotScalarCSE(validateExpr));
             translator.buildReturn(validctx, validateExpr);
@@ -515,7 +515,7 @@ void HqlCppTranslator::doBuildParseTransform(BuildCtx & classctx, IHqlExpression
     ensureRowAllocated(funcctx, "crSelf");
     funcctx.addQuoted("const unsigned char * left = (const unsigned char *) _left;");
     funcctx.associateExpr(activeNlpMarkerExpr, activeNlpMarkerExpr);
-    bindTableCursor(funcctx, queryMatchxxxPseudoFile(), queryMatchxxxPseudoFile());
+    bindTableCursor(funcctx, queryNlpParsePseudoTable(), queryNlpParsePseudoTable());
 
     // Bind left to "left" and right to RIGHT
     IHqlExpression * dataset = expr->queryChild(0);

+ 1 - 1
ecl/hqlcpp/hqlresource.cpp

@@ -2127,7 +2127,7 @@ protected:
     void findSplitPoints(IHqlExpression * expr)
     {
         //containsNonActiveDataset() would be nice - but that isn't percolated outside assigns etc.
-        if (containsAnyDataset(expr) || containsMustHoist(expr))
+        if (containsAnyDataset(expr) || containsMustHoist(expr) || !expr->isIndependentOfScope())
         {
             if (!gathered)
             {

+ 2 - 2
ecl/hqlcpp/hqlttcpp.cpp

@@ -6959,7 +6959,7 @@ void ScalarGlobalTransformer::analyseExpr(IHqlExpression * expr)
             if (extra->createGlobal)
                 return;
             //Allow a global to be created inside a global marked from somewhere else.
-            if (containsAnyDataset(expr) || expr->isConstant() || isContextDependent(expr))
+            if (containsAnyDataset(expr) || expr->isConstant() || isContextDependent(expr) || !expr->isIndependentOfScope())
                 return;
         }
     }
@@ -7012,7 +7012,7 @@ void ScalarGlobalTransformer::doAnalyseExpr(IHqlExpression * expr)
 #ifndef NEW_SCALAR_CODE
 //  Commented line has problems with SELF used in HOLE definition, and explosion in thumphrey7 etc.
 //  if (okToHoist && isIndependentOfScope(expr) && !expr->isConstant() && !isContextDependent(expr) && expr->isPure())
-    if (okToHoist && !containsAnyDataset(expr) && !expr->isConstant() && !isContextDependent(expr) && expr->isPure())
+    if (okToHoist && !containsAnyDataset(expr) && !expr->isConstant() && !isContextDependent(expr) && expr->isPure() && expr->isIndependentOfScope())
     {
         ITypeInfo * type = expr->queryType();
         if (isTypeToHoist(type))