/*##############################################################################
Copyright (C) 2011 HPCC Systems.
All rights reserved. This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
############################################################################## */
#include "platform.h"
#include "jlib.hpp"
#include "jmisc.hpp"
#include "jstream.ipp"
#include "hql.hpp"
#include "hqlcse.ipp"
#include "hqlutil.hpp"
#include "hqlcpputil.hpp"
#include "hqlthql.hpp"
#include "hqlcatom.hpp"
#include "hqlfold.hpp"
#include "hqlpmap.hpp"
#include "hqlopt.hpp"
#include "hqlcerrors.hpp"
#include "hqlttcpp.ipp"
#ifdef _DEBUG
//#define TRACE_CSE
#endif
//The following allows x != y and x == y to be commoned up. It works, but currently disabled
//because cse doesn't preserve short circuit of AND and OR, and some examples mean it will do more
//work because the alias will always be evaluated. (e.g., salt1.xhql)
//Really aliases need to be functional and executed on demand or something similar.
//#define OPTIMIZE_INVERSE
//---------------------------------------------------------------------------
inline bool canWrapWithCSE(IHqlExpression * expr)
{
switch (expr->getOperator())
{
case no_mapto:
return false;
}
return true;
}
bool canCreateTemporary(IHqlExpression * expr)
{
switch (expr->getOperator())
{
case no_range:
case no_rangefrom:
case no_rangeto:
case no_rangecommon:
case no_constant:
case no_all:
case no_mapto:
case no_record:
case no_attr:
case no_attr_expr:
case no_attr_link:
case no_joined:
case no_sizeof:
case no_offsetof:
case no_newtransform:
case no_transform:
case no_assign:
case no_assignall:
case no_left:
case no_right:
case no_self:
case no_top:
case no_activetable:
case no_alias:
case no_skip:
case no_assert:
case no_counter:
case no_sortlist:
case no_matched:
case no_matchtext:
case no_matchunicode:
case no_matchposition:
case no_matchlength:
case no_matchattr:
case no_matchrow:
case no_matchutf8:
case no_recordlist:
case no_transformlist:
case no_rowvalue:
case no_pipe:
case no_colon:
case no_globalscope:
case no_subgraph:
case no_forcelocal:
case no_forcenolocal:
case no_allnodes:
case no_thisnode:
case no_libraryscopeinstance:
case no_loopbody:
return false;
}
return !expr->isAction() && !expr->isTransform();
}
//---------------------------------------------------------------------------
/*
Cse spotting...
* Don't remove named symbols from items that aren't transformed.
* Common items up regardless of the named symbol used to reference it.
*/
CseSpotterInfo::CseSpotterInfo(IHqlExpression * expr) : NewTransformInfo(expr)
{
numRefs = 0;
numAssociatedRefs = 0;
alreadyAliased = false;
canAlias = false;
dontTransform = false;
dontTransformSelector = false;
treatAsAliased = false;
inverse = NULL;
annotatedExpr = NULL;
}
//worth aliasing if referenced more than once, and used more than once in the expressions that are going to be evaluated now
bool CseSpotterInfo::worthAliasingOnOwn()
{
return numRefs > 1 && (numRefs != numAssociatedRefs);
}
bool CseSpotterInfo::worthAliasing()
{
if (!inverse)
return worthAliasingOnOwn();
//no_not will always traverse the inverse (at least once), so don't sum the two counts - just use the non inverted count
if (original->getOperator() == no_not)
return worthAliasingOnOwn() || inverse->worthAliasingOnOwn();
if (inverse->original->getOperator() == no_not)
return worthAliasingOnOwn();
unsigned totalRefs = numRefs + inverse->numRefs;
unsigned totalAssociatedRefs = numAssociatedRefs + inverse->numAssociatedRefs;
if ((totalRefs > 1) && (totalRefs != totalAssociatedRefs))
return true;
return false;
}
//Do we create an alias for this node, or the other one?
bool CseSpotterInfo::useInverseForAlias()
{
if (!inverse)
return false;
if (numRefs == numAssociatedRefs)
return true;
node_operator op = original->getOperator();
switch (op)
{
case no_not:
case no_ne:
case no_notin:
case no_notbetween:
return inverse->worthAliasingOnOwn();
}
node_operator invOp = inverse->original->getOperator();
switch (invOp)
{
case no_not: return false; //No otherwise we'll expand recursively!
case no_ne:
case no_notin:
case no_notbetween:
return !worthAliasingOnOwn();
}
return op > invOp;
}
static HqlTransformerInfo cseSpotterInfo("CseSpotter");
CseSpotter::CseSpotter()
: NewHqlTransformer(cseSpotterInfo)
{
canAlias = true;
isAssociated = false;
spottedCandidate = false;
invariantSelector = NULL;
createLocalAliases = false;
createdAlias = false;
}
void CseSpotter::analyseAssociated(IHqlExpression * expr, unsigned pass)
{
isAssociated = true;
analyse(expr, pass);
isAssociated = false;
}
void CseSpotter::analyseExpr(IHqlExpression * expr)
{
CseSpotterInfo * extra = queryBodyExtra(expr);
if (!extra->annotatedExpr && expr->isAnnotation())
extra->annotatedExpr = expr;
if (isAssociated)
extra->numAssociatedRefs++;
node_operator op = expr->getOperator();
#ifdef OPTIMIZE_INVERSE
if (getInverseOp(op) != no_none)
{
OwnedHqlExpr inverse = getInverse(expr);
CseSpotterInfo * inverseExtra = queryBodyExtra(inverse);
extra->inverse = inverseExtra;
inverseExtra->inverse = extra;
}
#endif
if (op == no_alias)
{
queryBodyExtra(expr->queryChild(0))->alreadyAliased = true;
extra->alreadyAliased = true;
}
switch (op)
{
case no_assign:
case no_transform:
case no_newtransform:
case no_range:
case no_rangefrom:
if (expr->isConstant())
return;
break;
case no_constant:
return;
}
if (extra->numRefs++ != 0)
{
if (op == no_alias)
return;
if (!spottedCandidate && extra->worthAliasing())
spottedCandidate = true;
if (canCreateTemporary(expr))
return;
//Ugly! This is here as a temporary hack to stop branches of maps being commoned up and always
//evaluated. The alias spotting and generation really needs to take conditionality into account....
if (op == no_mapto)
return;
}
if (!containsPotentialCSE(expr))
return;
if (canAlias && !expr->isDataset())
extra->canAlias = true;
bool savedCanAlias = canAlias;
if (expr->isDataset() && (op != no_select))// && (op != no_if))
{
//There is little point looking for CSEs within dataset expressions, because only a very small
//minority which would correctly cse, and it can cause lots of problems - e.g., join conditions.
unsigned first = getFirstActivityArgument(expr);
unsigned num = getNumActivityArguments(expr);
HqlExprArray children;
bool defaultCanAlias = canAlias;
ForEachChild(i, expr)
{
IHqlExpression * cur = expr->queryChild(i);
if (i >= first && i < first+num)
canAlias = defaultCanAlias;
else
canAlias = false;
analyseExpr(cur);
}
}
else
PARENT::analyseExpr(expr);
canAlias = savedCanAlias;
}
IHqlExpression * CseSpotter::createAliasOwn(IHqlExpression * expr, CseSpotterInfo * extra)
{
#ifdef TRACE_CSE
StringBuffer s;
DBGLOG("Create alias for %s (%d refs)", getExprIdentifier(s, expr).str(), extra->numRefs);
#endif
extra->alreadyAliased = true;
if (createLocalAliases)
return ::createAliasOwn(expr, createLocalAttribute());
return ::createAliasOwn(expr, NULL);
}
IHqlExpression * CseSpotter::createTransformed(IHqlExpression * expr)
{
node_operator op = expr->getOperator();
switch (op)
{
case no_matched:
case no_matchtext:
case no_matchunicode:
case no_matchposition:
case no_matchlength:
case no_matchrow:
case no_matchutf8:
//These actually go wrong if we remove the named symbols, so traverse under no circumstances.
//others can be traversed to patch up references to datasets that have changed.
case no_translated:
return LINK(expr);
}
OwnedHqlExpr transformed = PARENT::createTransformed(expr);
CseSpotterInfo * splitter = queryBodyExtra(expr);
//MORE: Possibly add a unique number to the alias when this starts worrying about child scopes.
if (splitter->canAlias && splitter->worthAliasing() && checkPotentialCSE(expr, splitter))
{
if (splitter->useInverseForAlias())
{
OwnedHqlExpr inverse = getInverse(expr);
OwnedHqlExpr transformedInverse = transform(inverse);
return getInverse(transformedInverse);
}
createdAlias = true;
//Use the transformed body to ensure that any cses only create a single instance,
//But annotate with first annotation spotted, try and retain the symbols to aid debugging.
LinkedHqlExpr aliasValue = transformed->queryBody();
// if (splitter->annotatedExpr)
// aliasValue.setown(splitter->annotatedExpr->cloneAllAnnotations(aliasValue));
OwnedHqlExpr alias = createAliasOwn(aliasValue.getClear(), splitter);
return alias.getClear();
return expr->cloneAllAnnotations(alias);
}
return transformed.getClear();
}
ANewTransformInfo * CseSpotter::createTransformInfo(IHqlExpression * expr)
{
return CREATE_NEWTRANSFORMINFO(CseSpotterInfo, expr);
}
bool CseSpotter::containsPotentialCSE(IHqlExpression * expr)
{
switch (expr->getOperator())
{
case no_record:
case no_attr:
case no_attr_expr:
case no_attr_link:
case no_joined:
case no_sizeof:
case no_offsetof:
case no_field:
case no_evaluate: // MORE: This is an example of introducing a new scope...
case no_translated: // Causes recursion otherwise....
case no_left:
case no_right:
case no_top:
case no_self:
case no_selfref:
case no_activetable:
case no_filepos:
case no_file_logicalname:
case no_matched:
case no_matchtext:
case no_matchunicode:
case no_matchposition:
case no_matchrow:
case no_matchlength:
case no_matchutf8:
case no_catch:
case no_projectrow:
// case no_evalonce:
return false;
case no_select:
return false; //isNewSelector(expr);
case NO_AGGREGATE:
//There may possibly be cses, but we would need to do lots of scoping analysis to work out whether they were
//really common.
return false;
case no_assign:
case no_assignall:
case no_transform:
case no_newtransform:
case no_range:
case no_rangefrom:
case no_rangeto:
case no_rangecommon:
case no_skip:
return true;
case no_compound_diskread:
case no_compound_indexread:
case no_compound_disknormalize:
case no_compound_diskaggregate:
case no_compound_diskcount:
case no_compound_diskgroupaggregate:
case no_compound_indexnormalize:
case no_compound_indexaggregate:
case no_compound_indexcount:
case no_compound_indexgroupaggregate:
case no_compound_childread:
case no_compound_childnormalize:
case no_compound_childaggregate:
case no_compound_childcount:
case no_compound_childgroupaggregate:
case no_compound_selectnew:
case no_compound_inline:
return false;
#if 0
//Strictly speaking, we shouldn't common up conditional expressions, but it generally provides such a reduction in code
//that it will stay enabled until I come up with a better scheme.
case no_if:
case no_rejected:
case no_which:
case no_case:
case no_map:
return false;
#endif
}
ITypeInfo * type = expr->queryType();
if (type && type->getTypeCode() == type_void)
return false;
return !expr->isConstant();// || expr->isDataset() || expr->isDatarow();
}
bool CseSpotter::checkPotentialCSE(IHqlExpression * expr, CseSpotterInfo * extra)
{
if (extra->alreadyAliased)
return false;
if (!expr->isPure() || !canCreateTemporary(expr))
return false;
if (invariantSelector && exprReferencesDataset(expr, invariantSelector))
return false;
switch (expr->getOperator())
{
case no_eq:
case no_ne:
case no_gt:
case no_ge:
case no_lt:
case no_le:
{
//Don't combine integer comparisons into a CSE - not worth it...
ITypeInfo * type = expr->queryChild(0)->queryType();
switch (type->getTypeCode())
{
case type_boolean:
case type_int:
return false;
}
return true;
}
case no_not:
{
IHqlExpression * child = expr->queryChild(0);
if (queryBodyExtra(child)->isAliased())
return false;
break;
}
case no_charlen:
{
IHqlExpression * child = expr->queryChild(0);
if (queryBodyExtra(child)->isAliased() || child->getOperator() == no_select)
{
type_t tc = child->queryType()->getTypeCode();
switch (tc)
{
case type_varstring:
case type_varunicode:
return true;
}
//prevent (trivial-cast)length(x) from being serialized etc.
extra->treatAsAliased = true;
return false;
}
break;
}
case no_field:
throwUnexpected();
case no_select:
return false; //expr->hasProperty(newAtom);
case no_list:
case no_datasetlist:
case no_getresult: // these are commoned up in the code generator, so don't do it twice.
case no_getgraphresult:
case no_getgraphloopresult:
case no_translated: // Causes recursion otherwise....
case no_random:
return false;
case no_call:
case no_externalcall:
case no_libraryinput:
case no_counter:
return true;
case no_substring:
{
IHqlExpression * child = expr->queryChild(0);
// if (queryBodyExtra(child)->isAliased())
{
SubStringHelper helper(expr);
return !helper.canGenerateInline();
}
return true;
}
case no_cast:
case no_implicitcast:
{
ITypeInfo * exprType = expr->queryType();
if (exprType->getTypeCode() == type_set)
return false;
IHqlExpression * uncast = expr->queryChild(0);
if (uncast->queryValue())
return false;
//Ignore integral casts of items that have already been aliased
if (queryBodyExtra(uncast)->isAliased())
{
if (exprType->isInteger() && uncast->queryType()->isInteger())
{
if (extra->numRefs < 5)
return false;
}
}
break;
}
//Following are all source datasets - no point in commoning them up
//although probably exceptions e.g., table(,pipe)
case no_none:
case no_null:
case no_anon:
case no_pseudods:
case no_all:
// case no_table: - normally work commoning up
case no_temptable:
case no_inlinetable:
case no_xmlproject:
case no_datasetfromrow:
case no_preservemeta:
case no_dataset_alias:
case no_workunit_dataset:
case no_left:
case no_right:
case no_top:
case no_self:
case no_selfref:
case no_keyindex:
case no_newkeyindex:
case no_fail:
case no_activetable:
case no_soapcall:
case no_newsoapcall:
case no_id2blob:
case no_cppbody:
case no_rows:
return false;
}
if (!expr->queryType())
return false;
return (expr->numChildren() > 0);
}
IHqlExpression * CseSpotter::transform(IHqlExpression * expr)
{
return PARENT::transform(expr);
}
IHqlExpression * CseSpotter::queryAlreadyTransformed(IHqlExpression * expr)
{
CseSpotterInfo * extra = queryBodyExtra(expr);
if (extra->dontTransform)
return expr;
IHqlExpression * ret = PARENT::queryAlreadyTransformed(expr);
if (!ret)
{
IHqlExpression * body = expr->queryBody();
if (body != expr)
{
ret = PARENT::queryAlreadyTransformed(body);
if (ret == body)
return NULL;
}
}
return ret;
}
IHqlExpression * CseSpotter::queryAlreadyTransformedSelector(IHqlExpression * expr)
{
CseSpotterInfo * extra = queryBodyExtra(expr);
if (extra->dontTransformSelector)
return expr;
return PARENT::queryAlreadyTransformedSelector(expr);
}
void CseSpotter::stopTransformation(IHqlExpression * expr)
{
IHqlExpression * normalized = expr->queryNormalizedSelector();
queryBodyExtra(expr)->dontTransform = true;
queryBodyExtra(normalized)->dontTransformSelector = true;
}
//---------------------------------------------------------------------------
static HqlTransformerInfo conjunctionTransformerInfo("ConjunctionTransformer");
ConjunctionTransformer::ConjunctionTransformer() : NewHqlTransformer(conjunctionTransformerInfo)
{
}
IHqlExpression * ConjunctionTransformer::createTransformed(IHqlExpression * expr)
{
node_operator op = expr->getOperator();
OwnedHqlExpr transformed;
switch (op)
{
case no_matched:
case no_matchtext:
case no_matchunicode:
case no_matchlength:
case no_matchposition:
case no_matchrow:
case no_matchutf8:
return LINK(expr);
//not so sure why the following causes problems - because the tables get changed I think.
case no_filepos:
case no_file_logicalname:
case no_sizeof:
case no_offsetof:
return LINK(expr);
case no_and:
case no_or:
{
IHqlExpression * left = expr->queryChild(0);
if (left->getOperator() == op)
{
HqlExprArray args, transformedArgs;
left->unwindList(args, op);
ForEachItemIn(i, args)
transformedArgs.append(*transform(&args.item(i)));
transformedArgs.append(*transform(expr->queryChild(1)));
transformed.setown(createLeftBinaryList(op, transformedArgs));
// return expr->cloneAllAnnotations(transformed);
}
break;
}
}
if (!transformed)
transformed.setown(NewHqlTransformer::createTransformed(expr));
return transformed.getClear();
}
//---------------------------------------------------------------------------
#ifdef NEW_CSE_PROCESSING
inline bool canInsertCodeAlias(IHqlExpression * expr)
{
switch (expr->getOperator())
{
case no_range:
case no_rangefrom:
case no_rangeto:
case no_rangecommon:
case no_mapto:
case no_recordlist:
case no_transformlist:
case no_rowvalue:
case no_sortlist:
return false;
default:
return true;
}
}
static HqlTransformerInfo cseScopeTransformerInfo("CseScopeTransformer");
CseScopeTransformer::CseScopeTransformer()
: NewHqlTransformer(cseScopeTransformerInfo)
{
activeParent = NULL;
seq = 0;
conditionDepth = 0;
}
void CseScopeTransformer::analyseExpr(IHqlExpression * expr)
{
expr = expr->queryBody();
if (!containsNonGlobalAlias(expr))
return;
node_operator op = expr->getOperator();
CseScopeInfo * splitter = queryExtra(expr);
if (splitter->seq)
{
splitter->hasSharedParent = true;
splitter->addParent(activeParent);
return;
}
splitter->firstParent = activeParent;
splitter->seq = ++seq;
splitter->isUnconditional = (conditionDepth == 0);
{
IHqlExpression * savedParent = activeParent;
activeParent = expr;
switch (op)
{
case no_if:
case no_or:
case no_and:
case no_case:
{
analyseExpr(expr->queryChild(0));
conditionDepth++;
ForEachChildFrom(i, expr, 1)
analyseExpr(expr->queryChild(i));
conditionDepth--;
break;
}
default:
NewHqlTransformer::analyseExpr(expr);
break;
}
activeParent = savedParent;
}
//Add here so the cse are in the correct order to cope with dependencies...
if (op == no_alias)
{
assertex(!expr->hasProperty(globalAtom));
allCSEs.append(*LINK(splitter));
}
}
bool CseScopeTransformer::attachCSEs(IHqlExpression * root)
{
bool changed = false;
ForEachItemIn(idx, allCSEs)
{
CseScopeInfo& cur = allCSEs.item(idx);
IHqlExpression * aliasLocation = findAliasLocation(&cur);
if (!aliasLocation && cur.isUnconditional)
aliasLocation = root;
if (aliasLocation && aliasLocation != cur.original)
{
queryExtra(aliasLocation)->aliasesToDefine.append(*LINK(cur.original));
changed = true;
}
}
return changed;
}
IHqlExpression * CseScopeTransformer::createTransformed(IHqlExpression * expr)
{
//Can't short-circuit transformation if (!containsAlias(expr)) because it means references to transformed datasets won't get patched up
IHqlExpression * body = expr->queryBody(true);
if (body != expr)
{
OwnedHqlExpr ret = transform(body);
return expr->cloneAnnotation(ret);
}
//slight difference from before...
IHqlExpression * transformed = NewHqlTransformer::createTransformed(expr);
CseScopeInfo * splitter = queryExtra(expr);
if (splitter->aliasesToDefine.ordinality())
{
HqlExprArray args;
args.append(*transformed);
ForEachItemIn(idx, splitter->aliasesToDefine)
{
IHqlExpression * value = &splitter->aliasesToDefine.item(idx);
args.append(*transform(value));
}
if (expr->isDataset())
transformed = createDataset(no_alias_scope, args);
else if (expr->isDatarow())
transformed = createRow(no_alias_scope, args);
else
transformed = createValue(no_alias_scope, transformed->getType(), args);
}
return transformed;
}
ANewTransformInfo * CseScopeTransformer::createTransformInfo(IHqlExpression * expr)
{
return CREATE_NEWTRANSFORMINFO(CseScopeInfo, expr);
}
//First find the highest shared parent node (or this if no parents are shared)
CseScopeInfo * CseScopeTransformer::calcCommonLocation(CseScopeInfo * extra)
{
if (extra->calcedCommonLocation)
return extra->commonLocation;
CseScopeInfo * commonLocation = extra;
if (extra->firstParent)
{
CseScopeInfo * firstParentExtra = queryExtra(extra->firstParent);
CseScopeInfo * commonParent = calcCommonLocation(firstParentExtra);
if ((extra->parents.ordinality() == 0) && (!firstParentExtra->hasSharedParent || extra->firstParent->getOperator() == no_alias))
// if ((extra->parents.ordinality() == 0) && !firstParentExtra->hasSharedParent)
{
//assertex(commonParent == firstParentExtra);
//commonParent = extra;
}
else
{
extra->hasSharedParent = true;
commonLocation = commonParent;
ForEachItemIn(i, extra->parents)
{
CseScopeInfo * nextExtra = calcCommonLocation(queryExtra(extra->parents.item(i)));
if (nextExtra->isUnconditional)
extra->isUnconditional = true;
commonLocation = findCommonPath(commonLocation, nextExtra);
if (!commonLocation && extra->isUnconditional)
break;
}
}
}
else
{
if (extra->hasSharedParent)
commonLocation = NULL;
}
extra->calcedCommonLocation = true;
extra->commonLocation = commonLocation;
return commonLocation;
}
IHqlExpression * CseScopeTransformer::findAliasLocation(CseScopeInfo * extra)
{
CseScopeInfo * best = calcCommonLocation(extra);
loop
{
if (!best)
return NULL;
IHqlExpression * bestLocation = best->original;
if (canInsertCodeAlias(bestLocation))
return bestLocation;
best = selectParent(best);
}
}
CseScopeInfo * CseScopeTransformer::selectParent(CseScopeInfo * info)
{
if (info->hasSharedParent)
return info->commonLocation;
if (!info->firstParent)
return NULL;
return queryExtra(info->firstParent);
}
CseScopeInfo * CseScopeTransformer::findCommonPath(CseScopeInfo * left, CseScopeInfo * right)
{
loop
{
if (!left || !right)
return NULL;
if (left == right)
return left;
if (left->seq > right->seq)
left = selectParent(left);
else
right = selectParent(right);
}
}
#else
CSEentry::CSEentry(IHqlExpression * _value, PathArray & _path)
{
value.set(_value);
unsigned depth=_path.ordinality();
path.ensure(depth);
ForEachItemIn(idx, _path)
path.append(_path.item(idx));
ensurePathValid();
}
void CSEentry::ensurePathValid()
{
//It is not valid to insert a no_code_alias at certain points....
while (path.ordinality())
{
switch (path.tos().getOperator())
{
case no_range:
case no_rangefrom:
case no_rangeto:
case no_rangecommon:
case no_mapto:
case no_recordlist:
case no_transformlist:
case no_rowvalue:
case no_sortlist:
path.pop();
break;
default:
return;
}
}
}
void CSEentry::findCommonPath(PathArray & otherPath)
{
unsigned prevPath = path.ordinality();
unsigned maxPath = path.ordinality();
if (maxPath > otherPath.ordinality())
maxPath = otherPath.ordinality();
unsigned idx;
for (idx = 0; idx < maxPath; idx++)
{
IHqlExpression * l = &path.item(idx);
IHqlExpression * r = &otherPath.item(idx);
if (l != r)
break;
}
//Ensure the new location is valid for receiving the CSE
while (idx != 0)
{
if (canWrapWithCSE(&path.item(idx-1)))
break;
idx--;
}
path.trunc(idx);
if (prevPath != path.ordinality())
{
ForEachItemIn(idx2, dependsOn)
dependsOn.item(idx2).findCommonPath(path);
}
ensurePathValid();
}
static HqlTransformerInfo cseScopeTransformerInfo("CseScopeTransformer");
CseScopeTransformer::CseScopeTransformer()
: NewHqlTransformer(cseScopeTransformerInfo)
{
}
void CseScopeTransformer::analyseExpr(IHqlExpression * expr)
{
expr = expr->queryBody();
if (!containsNonGlobalAlias(expr))
return;
CSEentry * cse = NULL;
node_operator op = expr->getOperator();
if (op == no_alias)
{
assertex(!expr->hasProperty(globalAtom));
CseScopeInfo * splitter = queryExtra(expr);
//PrintLog("splitter: %s", expr->toString(StringBuffer()).str());
if (splitter->cseUse)
{
//Find the common path, and map the alias.
CSEentry * cse = splitter->cseUse;
cse->findCommonPath(path);
if (activeCSE.ordinality())
activeCSE.tos().dependsOn.append(*LINK(cse));
return;
}
cse = new CSEentry(expr, path);
splitter->cseUse.setown(cse);
if (activeCSE.ordinality())
activeCSE.tos().dependsOn.append(*LINK(cse));
activeCSE.append(*LINK(cse));
}
#if 0
if ((op == no_transform) || (op == no_newtransform))
{
//For a transform add each assignment as a path point - so the aliases for assignments don't end up
//before aliases for skip attributes.
path.append(*expr);
ForEachChild(i, expr)
{
IHqlExpression * cur = expr->queryChild(i);
analyseExpr(cur);
path.append(*cur);
}
ForEachChild(i2, expr)
path.pop();
path.pop();
}
else
#endif
{
path.append(*expr);
NewHqlTransformer::analyseExpr(expr);
path.pop();
}
//Add here so the cse are in the correct order to cope with dependencies...
if (cse)
{
allCSEs.append(*LINK(cse));
activeCSE.pop();
}
}
bool CseScopeTransformer::attachCSEs(IHqlExpression * /*root*/)
{
bool changed = false;
ForEachItemIn(idx, allCSEs)
{
CSEentry & cur = allCSEs.item(idx);
if (cur.path.ordinality())
{
IHqlExpression & location = cur.path.tos();
queryExtra(&location)->cseDefine.append(OLINK(cur));
changed = true;
}
}
return changed;
}
IHqlExpression * CseScopeTransformer::createTransformed(IHqlExpression * expr)
{
//Can't short-circuit transformation if (!containsAlias(expr)) because it means references to transformed datasets won't get patched up
IHqlExpression * body = expr->queryBody(true);
if (body != expr)
{
OwnedHqlExpr ret = transform(body);
return expr->cloneAnnotation(ret);
}
//slight difference from before...
IHqlExpression * transformed = NewHqlTransformer::createTransformed(expr);
CseScopeInfo * splitter = queryExtra(expr);
if (splitter->cseDefine.ordinality())
{
HqlExprArray args;
args.append(*transformed);
ForEachItemIn(idx, splitter->cseDefine)
{
CSEentry & cur = splitter->cseDefine.item(idx);
args.append(*transform(cur.value));
}
if (expr->isDataset())
transformed = createDataset(no_alias_scope, args);
else if (expr->isDatarow())
transformed = createRow(no_alias_scope, args);
else
transformed = createValue(no_alias_scope, transformed->getType(), args);
}
return transformed;
}
ANewTransformInfo * CseScopeTransformer::createTransformInfo(IHqlExpression * expr)
{
return CREATE_NEWTRANSFORMINFO(CseScopeInfo, expr);
}
#endif
IHqlExpression * spotScalarCSE(IHqlExpression * expr, IHqlExpression * limit)
{
if (expr->isConstant())
return LINK(expr);
switch (expr->getOperator())
{
case no_select:
if (!expr->hasProperty(newAtom))
return LINK(expr);
break;
}
OwnedHqlExpr transformed = LINK(expr); //removeNamedSymbols(expr);
bool addedAliases = false;
//First spot the aliases - so that restructuring the ands doesn't lose any existing aliases.
{
CseSpotter spotter;
spotter.analyse(transformed, 0);
if (spotter.foundCandidates())
{
if (limit)
spotter.stopTransformation(limit);
transformed.setown(spotter.transformRoot(transformed));
addedAliases = spotter.createdNewAliases();
}
}
if (!containsAlias(transformed))
return transformed.getClear();
//Transform conjunctions so they are (a AND (b AND (c AND d))) not (((a AND b) AND c) AND d)
//so that alias scope can be introduced in a better place.
{
ConjunctionTransformer tr;
transformed.setown(tr.transformRoot(transformed));
}
if (!addedAliases)
return transformed.getClear();
//Now work out where in the tree the aliases should be evaluated.
{
CseScopeTransformer scoper;
scoper.analyse(transformed, 0);
if (scoper.attachCSEs(transformed))
transformed.setown(scoper.transformRoot(transformed));
}
return transformed.getClear();
}
void spotScalarCSE(SharedHqlExpr & expr, SharedHqlExpr & associated, IHqlExpression * limit, IHqlExpression * invariantSelector)
{
CseSpotter spotter;
spotter.analyse(expr, 0);
if (associated)
spotter.analyseAssociated(associated, 0);
if (!spotter.foundCandidates())
return;
if (limit)
spotter.stopTransformation(limit);
if (invariantSelector)
spotter.setInvariantSelector(invariantSelector);
expr.setown(spotter.transformRoot(expr));
associated.setown(spotter.transformRoot(associated));
}
void spotScalarCSE(HqlExprArray & exprs, HqlExprArray & associated, IHqlExpression * limit, IHqlExpression * invariantSelector)
{
CseSpotter spotter;
spotter.analyseArray(exprs, 0);
ForEachItemIn(ia, associated)
spotter.analyseAssociated(&associated.item(ia), 0);
if (!spotter.foundCandidates())
return;
if (limit)
spotter.stopTransformation(limit);
if (invariantSelector)
spotter.setInvariantSelector(invariantSelector);
HqlExprArray newExprs;
HqlExprArray newAssociated;
spotter.transformRoot(exprs, newExprs);
spotter.transformRoot(associated, newAssociated);
replaceArray(exprs, newExprs);
replaceArray(associated, newAssociated);
}
//---------------------------------------------------------------------------
//The TableInvariantTransformer is important for ensuring that getResultXXX code is executed in the code context, amongst other things
//It must ensure that any global aliases couldn't contain some other global aliases inside a child query, otherwise when the child query is
//evaluated the result won't be in the correct place.
//
//MORE: This could be improved to work out whether it is worth creating an alias (which will then be serialized...)
//e.g., don't alias i) > +- offset or ii) extension of an alias's size., iii) substring of a fixed size string. iv) length(string
//however it is pretty good as it stands.
//ideally it would need information about how many times the expression is likely to be evaluated (e.g., 1/many)
//so that could be taken into account (e.g, filenames which are 'string' + conditional)
static bool canHoistInvariant(IHqlExpression * expr)
{
if (!canCreateTemporary(expr))
{
if ((expr->getOperator() != no_alias) || expr->hasProperty(globalAtom))
return false;
}
if (!expr->isPure())
return false;
switch (expr->getOperator())
{
case no_list:
case no_datasetlist:
return false; // probably don't want to hoist these
}
return true;
}
static HqlTransformerInfo tableInvariantTransformerInfo("TableInvariantTransformer");
TableInvariantTransformer::TableInvariantTransformer() : NewHqlTransformer(tableInvariantTransformerInfo)
{
canAlias = true;
}
bool TableInvariantTransformer::isInvariant(IHqlExpression * expr)
{
TableInvariantInfo * extra = queryBodyExtra(expr);
if (extra->cachedInvariant)
return extra->isInvariant;
bool invariant = false;
node_operator op = expr->getOperator();
switch (op)
{
case no_record:
case no_null:
case no_activetable:
case no_activerow:
case no_left:
case no_right:
case no_self:
case no_top:
case no_selfref:
case no_filepos:
case no_file_logicalname:
case no_joined:
case no_offsetof:
case no_sizeof:
case NO_AGGREGATE:
break;
case no_preservemeta:
invariant = isInvariant(expr->queryChild(0));
break;
case no_constant:
case no_workunit_dataset:
case no_getresult:
case no_getgraphresult:
invariant = true;
break;
case no_select:
{
IHqlExpression * ds = expr->queryChild(0);
if ((expr->hasProperty(newAtom) || ds->isDatarow()) && !expr->isDataset())
invariant = isInvariant(ds);
break;
}
case no_newaggregate:
{
//Allow these on a very strict subset of the datasets - to ensure that no potential globals can be included in the dataset
if (!isInvariant(expr->queryChild(0)))
break;
switch (querySimpleAggregate(expr, false, true))
{
case no_existsgroup:
case no_countgroup:
invariant = true;
break;
}
break;
}
case no_selectnth:
switch (expr->queryChild(1)->getOperator())
{
case no_constant:
case no_counter:
invariant = isInvariant(expr->queryChild(0));
break;
}
break;
default:
if (!isContextDependent(expr))
{
if (!expr->isAction())// && !expr->isDataset() && !expr->isDatarow())
{
invariant = true;
ForEachChild(i, expr)
{
IHqlExpression * cur = expr->queryChild(i);
if (!isInvariant(cur))
{
invariant = false;
break;
}
}
}
}
break;
}
extra->cachedInvariant = true;
extra->isInvariant = invariant;
return invariant;
}
#if 0
void TableInvariantTransformer::analyseExpr(IHqlExpression * expr)
{
expr = expr->queryBody();
if (alreadyVisited(expr))
return;
node_operator op = expr->getOperator();
switch (op)
{
case no_record:
case no_constant:
return;
}
if (isInvariant(expr) && !expr->isAttribute() && !expr->isConstant() && canHoistInvariant(expr))
{
TableInvariantInfo * extra = queryBodyExtra(expr);
if (op == no_alias)
{
if (!expr->hasProperty(globalAtom))
extra->createAlias = true;
}
else
extra->createAlias = true;
return;
}
if (op == no_attr_expr)
analyseChildren(expr);
else
NewHqlTransformer::analyseExpr(expr);
}
#else
void TableInvariantTransformer::analyseExpr(IHqlExpression * expr)
{
expr = expr->queryBody();
TableInvariantInfo * extra = queryBodyExtra(expr);
if (alreadyVisited(expr))
return;
//More - these need to be handled properly...
node_operator op = expr->getOperator();
switch (op)
{
case no_record:
case no_constant:
return;
}
//We are trying to ensure that any expressions that don't access fields that are dependent on the activeDatasets/context are only
//evaluated once => check for active dataset rather than any dataset
bool candidate = false;
if (!isContextDependent(expr) && !expr->isAttribute())
{
if (isInlineTrivialDataset(expr) && !expr->isConstant())
{
candidate = (op != no_null);
}
else
{
if (!containsActiveDataset(expr))
{
//MORE: We should be able to hoist constant datasets (e.g., temptables), but it causes problems
//e.g., stops items it contains from being aliased. So
if (!expr->isAction() && !expr->isDataset() && !expr->isDatarow())
{
switch (op)
{
case no_alias:
if (!expr->hasProperty(globalAtom))
extra->createAlias = true;
return;
default:
//MORE: We should be able to hoist constant datasets (e.g., temptables), but it causes problems
//e.g., stops items it contains from being aliased.
candidate = !expr->isConstant();
break;
}
}
}
}
if (candidate && canHoistInvariant(expr))
{
extra->createAlias = true;
return;
}
}
if (op == no_attr_expr)
analyseChildren(expr);
else
NewHqlTransformer::analyseExpr(expr);
}
#endif
bool TableInvariantTransformer::isAlwaysAlias(IHqlExpression * expr)
{
if (queryBodyExtra(expr)->createAlias)
return true;
switch (expr->getOperator())
{
case no_alias:
case no_getresult: // these are commoned up in the code generator, so don't do it twice.
case no_getgraphresult:
case no_getgraphloopresult:
return true;
}
return false;
}
bool TableInvariantTransformer::isTrivialAlias(IHqlExpression * expr)
{
switch (expr->getOperator())
{
case no_cast:
case no_implicitcast:
//Don't create aliases for items that are simply integral casts of other aliases.
{
ITypeInfo * type = expr->queryType();
if (type->isInteger())
{
IHqlExpression * cast = expr->queryChild(0);
ITypeInfo * castType = cast->queryType();
if (castType->isInteger() && isAlwaysAlias(cast))
{
switch (type->getSize())
{
case 1: case 2: case 4: case 8:
return true;
}
}
}
break;
}
case no_not:
{
IHqlExpression * child = expr->queryChild(0);
if (isAlwaysAlias(child))
return true;
break;
}
}
return false;
}
IHqlExpression * TableInvariantTransformer::createTransformed(IHqlExpression * expr)
{
if (expr->getOperator() == no_alias)
{
OwnedHqlExpr newChild = transform(expr->queryChild(0));
if (newChild->getOperator() == no_alias)
return newChild.getClear();
}
OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
if (queryBodyExtra(expr)->createAlias)
{
if (!isTrivialAlias(expr))
{
OwnedHqlExpr attr = createAttribute(globalAtom);
if (transformed->getOperator() == no_alias)
transformed.set(transformed->queryChild(0));
return createAlias(transformed->queryBody(), attr);
}
}
return transformed.getClear();
}
//---------------------------------------------------------------------------
IHqlExpression * spotTableInvariant(IHqlExpression * expr)
{
TableInvariantTransformer transformer;
transformer.analyse(expr, 0);
return transformer.transformRoot(expr);
}
IHqlExpression * spotTableInvariantChildren(IHqlExpression * expr)
{
TableInvariantTransformer transformer;
ForEachChild(i1, expr)
transformer.analyse(expr->queryChild(i1), 0);
return transformer.transformRoot(expr);
}
//---------------------------------------------------------------------------
static HqlTransformerInfo globalAliasTransformerInfo("GlobalAliasTransformer");
GlobalAliasTransformer::GlobalAliasTransformer() : NewHqlTransformer(globalAliasTransformerInfo)
{
insideGlobal = false;
}
void GlobalAliasTransformer::analyseExpr(IHqlExpression * expr)
{
if (!containsAlias(expr))
return;
bool wasInsideGlobal = insideGlobal;
GlobalAliasInfo * extra = queryBodyExtra(expr);
extra->numUses++;
if (expr->getOperator() == no_alias)
{
if (expr->hasProperty(globalAtom))
{
// assertex(!containsActiveDataset(expr) || isInlineTrivialDataset(expr));
if (!insideGlobal)
extra->isOuter = true;
}
if (extra->numUses > 1)
return;
if (extra->isOuter)
insideGlobal = true;
}
else
{
//ugly, but we need to walk children more than once even if we've already been here.
//What is important is if visited >1 or occur globally, so can short circuit based on that condition.
//This currently links too many times because subsequent cse generation may common up multiple uses of the same item
//but it's not too bad.
//We could rerun this again if that was a major issue.
if (insideGlobal)
{
if (extra->numUses > 2)
return; // may need to visit children more than once so that alias is linked twice.
}
else
{
if (extra->isOuter && (extra->numUses > 2))
return;
extra->isOuter = true;
}
}
if (expr->getOperator() == no_attr_expr)
analyseChildren(expr);
else
NewHqlTransformer::analyseExpr(expr);
insideGlobal = wasInsideGlobal;
}
IHqlExpression * GlobalAliasTransformer::createTransformed(IHqlExpression * expr)
{
OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
if ((expr->getOperator() == no_alias))
{
GlobalAliasInfo * extra = queryBodyExtra(expr);
if (expr->hasProperty(globalAtom))
{
if (!extra->isOuter)
{
if (extra->numUses == 1)
return LINK(transformed->queryChild(0));
if (!expr->hasProperty(localAtom))
return appendLocalAttribute(transformed);
}
else if (expr->hasProperty(localAtom))
{
//Should never occur - but just about conceivable that some kind of constant folding
//might cause a surrounding global alias to be removed.
return removeLocalAttribute(transformed);
}
}
else
{
if ((extra->numUses == 1) && !expr->hasProperty(internalAtom))
return LINK(transformed->queryChild(0));
}
}
return transformed.getClear();
}
//---------------------------------------------------------------------------
IHqlExpression * optimizeActivityAliasReferences(IHqlExpression * expr)
{
if (!containsAlias(expr))
return LINK(expr);
unsigned first = getFirstActivityArgument(expr);
unsigned last = first + getNumActivityArguments(expr);
bool foundAlias = false;
ForEachChild(i1, expr)
{
IHqlExpression * cur = expr->queryChild(i1);
if (((i1 < first) || (i1 >= last)) && containsAlias(cur))
{
foundAlias = true;
break;
}
}
if (!foundAlias)
return LINK(expr);
GlobalAliasTransformer transformer;
ForEachChild(i2, expr)
{
IHqlExpression * cur = expr->queryChild(i2);
if (((i2 < first) || (i2 >= last)) && containsAlias(cur))
transformer.analyse(cur, 0);
}
HqlExprArray args;
ForEachChild(i3, expr)
{
IHqlExpression * cur = expr->queryChild(i3);
if ((i3 < first) || (i3 >= last))
args.append(*transformer.transformRoot(cur));
else
args.append(*LINK(cur));
}
return cloneOrLink(expr, args);
}