/*############################################################################## Copyright (C) 2011 HPCC Systems. All rights reserved. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . ############################################################################## */ #include "platform.h" #include "jlib.hpp" #include "jmisc.hpp" #include "jstream.ipp" #include "hql.hpp" #include "hqliproj.ipp" #include "hqlutil.hpp" #include "hqlcpputil.hpp" #include "hqlthql.hpp" #include "hqlhtcpp.ipp" #include "hqltcppc.ipp" #include "hqlcatom.hpp" #include "hqlfold.hpp" #include "hqlpmap.hpp" #include "hqlopt.hpp" #include "hqlcerrors.hpp" #include "hqlsource.ipp" #include "hqlattr.hpp" #include "jsort.hpp" //#define PRESERVE_TRANSFORM_ANNOTATION // improves the text in the graph, occasionally prevents optimizations. Maybe add on a debug flag. //#define POST_COMMON_ANNOTATION // would be better if this just commoned up transforms... //#define PARANOID_CHECKING //MORE: Extend UsedFieldSet/NestedField so that once all is set it is never modified and then allow a LINK from //a nestedfield to clone. //Problems: // combine the two classes for simplicity // save links in the complexextra rather than objects. // Ensure all functions that modify now return objects + clone if modifying and was all. // Need to implement gathered-enough in a different way so it isn't shared. enum { CostMemoryCopy = 1, CostNetworkGroup = 1, CostGlobalTopN = 2, CostManyCopy = 3, CostNetworkCopy = 10, }; //------------------------------------------------------------------------------------------------- void UsedFieldSet::addUnique(IHqlExpression * field) { //MORE: Add if (!all test to short-circuit contains) if (!contains(*field)) appendField(*LINK(field)); } NestedField * UsedFieldSet::addNested(IHqlExpression * field) { NestedField * match = findNested(field); if (!match) { assertex(originalFields); #ifdef PARANOID_CHECKING assertex(!contains(*field)); assertex(originalFields->contains(*field)); #endif NestedField * original = originalFields->findNested(field); assertex(original); match = new NestedField(field, &original->used); appendNested(*LINK(field), match); } else { assertex(contains(*field)); } return match; } void UsedFieldSet::appendNested(IHqlExpression & ownedField, NestedField * ownedNested) { appendField(ownedField); nested.append(*ownedNested); } bool UsedFieldSet::checkAllFieldsUsed() { if (all) return true; assertex(originalFields); if (fields.ordinality() != originalFields->fields.ordinality()) return false; ForEachItemIn(i, nested) { NestedField & cur = nested.item(i); if (!cur.used.checkAllFieldsUsed()) return false; } all = true; return true; } bool UsedFieldSet::allGathered() const { if (maxGathered == (unsigned)-1) return true; if (maxGathered < fields.ordinality()) return false; ForEachItemIn(i, nested) { if (!nested.item(i).used.allGathered()) return false; } return true; } void UsedFieldSet::appendField(IHqlExpression & ownedField) { #ifdef PARANOID_CHECKING assertex(!contains(ownedField)); #endif #ifdef USE_IPROJECT_HASH if (!all) hash.add(&ownedField); #endif fields.append(ownedField); #ifdef PARANOID_CHECKING if (originalFields) assertex(originalFields->contains(ownedField)); #endif } void UsedFieldSet::clone(const UsedFieldSet & source) { assertex(originalFields == source.originalFields); ForEachItemIn(i, source.fields) appendField(OLINK(source.fields.item(i))); ForEachItemIn(i1, source.nested) nested.append(*source.nested.item(i1).clone()); if (source.all) all = true; finalRecord.set(source.finalRecord); } unsigned UsedFieldSet::getOriginalPosition(IHqlExpression * field) const { assertex(originalFields == this); unsigned match = fields.find(*field); if (match != NotFound) return match; assertex(field->isDatarow()); assertex(finalRecord); OwnedHqlExpr originalField = finalRecord->querySimpleScope()->lookupSymbol(field->queryName()); assertex(originalField && originalField != field); unsigned matchOriginal = fields.find(*originalField); assertex(matchOriginal != NotFound); return matchOriginal; } int UsedFieldSet::compareOrder(IHqlExpression * left, IHqlExpression * right) const { return (int)(getOriginalPosition(left) - getOriginalPosition(right)); } bool UsedFieldSet::contains(IHqlExpression & field) const { if (all) return true; #ifdef USE_IPROJECT_HASH return hash.find(&field) != NULL; #else return fields.contains(field); #endif } bool UsedFieldSet::contains(_ATOM name) const { if (all) return true; ForEachItemIn(i, fields) { if (fields.item(i).queryName() == name) return true; } return false; } //Calculate left - right void UsedFieldSet::createDifference(const UsedFieldSet & left, const UsedFieldSet & right) { if (right.includeAll()) return; //if all are used and non modifyable this code will need changing. ForEachItemIn(i, left.fields) { IHqlExpression & cur = left.fields.item(i); if (cur.isDatarow()) { NestedField * leftNested = left.findNested(&cur); NestedField * rightNested = right.findNested(&cur); assertex(leftNested); if (rightNested) { Owned diffNested = new NestedField(&cur, leftNested->used.queryOriginal()); diffNested->used.createDifference(leftNested->used, rightNested->used); if (!diffNested->isEmpty()) appendNested(OLINK(cur), diffNested.getClear()); } else if (!leftNested->isEmpty()) { appendNested(OLINK(cur), leftNested->clone()); } } else { if (!right.contains(cur)) appendField(OLINK(cur)); } } } NestedField * UsedFieldSet::findNested(IHqlExpression * field) const { ForEachItemIn(i, nested) { NestedField & cur = nested.item(i); if (cur.field == field) return &cur; } return NULL; } IHqlExpression * UsedFieldSet::createFilteredAssign(IHqlExpression * field, IHqlExpression * value, IHqlExpression * newSelf, const UsedFieldSet * exceptions) const { if (!contains(*field)) return NULL; OwnedHqlExpr newValue = LINK(value); OwnedHqlExpr newField; if (field->isDatarow()) { NestedField * match = findNested(field); assertex(match); NestedField * exception = exceptions ? exceptions->findNested(field) : NULL; if (!match->isEmpty()) { bool createSubset = true; if (match->used.checkAllFieldsUsed()) { if (!exception || exception->isEmpty()) { createSubset = false; } else if (exception && exception->used.checkAllFieldsUsed()) { newValue.setown(createNullExpr(field)); createSubset = false; } } if (createSubset) { newField.setown(finalRecord->querySimpleScope()->lookupSymbol(field->queryName())); assertex(newField); assertex(exception || newField != field); //Two options - this is either a no_createrow, and we extract the assignments from the transform //or we create a no_createrow to extract the values from the other record OwnedHqlExpr newTransform; UsedFieldSet * exceptionFields = exception ? &exception->used : NULL; if (value->getOperator() == no_createrow) { newTransform.setown(match->used.createFilteredTransform(value->queryChild(0), exceptionFields)); } else { newTransform.setown(match->used.createRowTransform(value, exceptionFields)); } newValue.setown(createRow(no_createrow, newTransform.getClear())); #if defined(PRESERVE_TRANSFORM_ANNOTATION) newValue.setown(value->cloneAllAnnotations(newValue)); #endif } } else newValue.clear(); } else { if (exceptions && exceptions->contains(*field)) { newValue.setown(createNullExpr(field)); } } if (newValue) { IHqlExpression * rhs = newField ? newField.get() : field; OwnedHqlExpr newLhs = createSelectExpr(LINK(newSelf), LINK(rhs)); return createAssign(newLhs.getClear(), newValue.getClear()); } return NULL; } void UsedFieldSet::createFilteredAssigns(HqlExprArray & assigns, IHqlExpression * transform, IHqlExpression * newSelf, const UsedFieldSet * exceptions) const { ForEachChild(i, transform) { IHqlExpression * cur = transform->queryChild(i); switch (cur->getOperator()) { case no_assign: { IHqlExpression * lhs = cur->queryChild(0); IHqlExpression * field = lhs->queryChild(1); IHqlExpression * value = cur->queryChild(1); IHqlExpression * assign = createFilteredAssign(field, value, newSelf, exceptions); if (assign) assigns.append(*assign); break; } case no_assignall: createFilteredAssigns(assigns, cur, newSelf, exceptions); break; default: assigns.append(*LINK(cur)); break; } } } IHqlExpression * UsedFieldSet::createFilteredTransform(IHqlExpression * transform, const UsedFieldSet * exceptions) const { OwnedHqlExpr self = getSelf(finalRecord); HqlExprArray assigns; createFilteredAssigns(assigns, transform, self, exceptions); OwnedHqlExpr ret = createValue(transform->getOperator(), makeTransformType(finalRecord->getType()), assigns); #if defined(PRESERVE_TRANSFORM_ANNOTATION) return transform->cloneAllAnnotations(ret); #else return ret.getClear(); #endif } IHqlExpression * UsedFieldSet::createRowTransform(IHqlExpression * row, const UsedFieldSet * exceptions) const { OwnedHqlExpr self = getSelf(finalRecord); HqlExprArray assigns; ForEachItemIn(i, fields) { IHqlExpression & field = fields.item(i); OwnedHqlExpr value = createNewSelectExpr(LINK(row), LINK(&field)); OwnedHqlExpr assign = createFilteredAssign(&field, value, self, exceptions); if (assign) assigns.append(*assign.getClear()); } return createValue(no_transform, makeTransformType(finalRecord->getType()), assigns); } void UsedFieldSet::calcFinalRecord(bool canPack, bool ignoreIfEmpty) { assertex(originalFields); if (finalRecord) return; ForEachItemIn(i1, nested) nested.item(i1).used.calcFinalRecord(canPack, true); IHqlExpression * originalRecord = queryOriginalRecord(); if (checkAllFieldsUsed()) { if (canPack) finalRecord.setown(getPackedRecord(originalRecord)); else finalRecord.set(originalRecord); return; } HqlExprArray recordFields; ForEachItemIn(i, fields) { IHqlExpression & cur = fields.item(i); if (cur.isDatarow()) { NestedField * match = findNested(&cur); assertex(match); IHqlExpression * record = cur.queryRecord(); IHqlExpression * newRecord = match->used.queryFinalRecord(); if (record == newRecord) { recordFields.append(OLINK(cur)); } else if (newRecord) { HqlExprArray args; unwindChildren(args, &cur); OwnedHqlExpr newField = createField(cur.queryName(), makeRowType(newRecord->getType()), args); recordFields.append(*newField.getClear()); } } else recordFields.append(OLINK(cur)); } if (originalFields) { //Reorder the record to match the original fields RecordOrderComparer compare(*originalFields); qsortvec((void * *)recordFields.getArray(), recordFields.ordinality(), compare); } if (recordFields.ordinality() == 0) { if (ignoreIfEmpty) return; recordFields.append(*createAttribute(_nonEmpty_Atom)); } finalRecord.setown(createRecord(recordFields)); if (canPack) finalRecord.setown(getPackedRecord(finalRecord)); OwnedHqlExpr serializedRecord = getSerializedForm(finalRecord); if (maxRecordSizeUsesDefault(serializedRecord)) { HqlExprArray recordFields; unwindChildren(recordFields, finalRecord); //Lost some indication of the record size->add an attribute IHqlExpression * max = originalRecord->queryProperty(maxLengthAtom); if (max) recordFields.append(*LINK(max)); else { bool isKnownSize, useDefaultRecordSize; OwnedHqlExpr oldSerializedRecord = getSerializedForm(originalRecord); unsigned oldRecordSize = getMaxRecordSize(oldSerializedRecord, 0, isKnownSize, useDefaultRecordSize); if (!useDefaultRecordSize) recordFields.append(*createAttribute(maxLengthAtom, getSizetConstant(oldRecordSize))); } finalRecord.setown(createRecord(recordFields)); } } void UsedFieldSet::gatherExpandSelectsUsed(HqlExprArray * selfSelects, SelectUsedArray * parentSelects, IHqlExpression * selector, IHqlExpression * source) { assertex(selfSelects ? selector != NULL : true); for (unsigned i1 = maxGathered; i1 < fields.ordinality(); i1++) { IHqlExpression & cur = fields.item(i1); if (!cur.isDatarow()) { if (selfSelects) { OwnedHqlExpr selected = createSelectExpr(LINK(selector), LINK(&cur)); selfSelects->append(*selected.getClear()); } if (parentSelects) { OwnedHqlExpr sourceSelected = createSelectExpr(LINK(source), LINK(&cur)); parentSelects->append(*sourceSelected.getClear()); } } } maxGathered = fields.ordinality(); ForEachItemIn(i2, nested) { NestedField & curNested = nested.item(i2); IHqlExpression * field = curNested.field; OwnedHqlExpr selected = selector ? createSelectExpr(LINK(selector), LINK(field)) : NULL; OwnedHqlExpr sourceSelected = createSelectExpr(LINK(source), LINK(field)); if (!curNested.includeAll()) { curNested.used.gatherExpandSelectsUsed(selfSelects, parentSelects, selected, sourceSelected); sourceSelected.clear(); } else { curNested.used.noteGatheredAll(); if (selfSelects) selfSelects->append(*selected.getClear()); if (parentSelects) parentSelects->append(*sourceSelected.getClear()); } } } inline bool isSelector(IHqlExpression * expr) { return (expr->getOperator() == no_select) && !isNewSelector(expr); } void UsedFieldSet::gatherTransformValuesUsed(HqlExprArray * selfSelects, SelectUsedArray * parentSelects, HqlExprArray * values, IHqlExpression * selector, IHqlExpression * transform) { for (unsigned i = maxGathered; i < fields.ordinality(); i++) { IHqlExpression & cur = fields.item(i); if (!cur.isDatarow()) { if (selfSelects) { OwnedHqlExpr selected = createSelectExpr(LINK(selector), LINK(&cur)); selfSelects->append(*selected.getClear()); } if (values) { IHqlExpression * transformValue = queryTransformAssignValue(transform, &cur); //If no transform value is found then we almost certainly have an invalid query (e.g, LEFT inside a //global). Don't add the value - you'll definitely get a later follow on error assertex(transformValue); values->append(*LINK(transformValue)); } } } maxGathered = fields.ordinality(); ForEachItemIn(i2, nested) { NestedField & curNested = nested.item(i2); if (!curNested.isEmpty() && !curNested.used.allGathered()) { IHqlExpression * field = curNested.field; OwnedHqlExpr selected = selector ? createSelectExpr(LINK(selector), LINK(field)) : NULL; IHqlExpression * transformValue = queryTransformAssignValue(transform, field); assertex(transformValue); bool includeThis = true; if (!curNested.includeAll() && transformValue->isPure()) { if (transformValue->getOperator() == no_createrow) { curNested.used.gatherTransformValuesUsed(selfSelects, parentSelects, values, selected, transformValue->queryChild(0)); includeThis = false; } else if (isAlwaysActiveRow(transformValue) || isSelector(transformValue)) { curNested.used.gatherExpandSelectsUsed(selfSelects, parentSelects, selected, transformValue); includeThis = false; } //otherwise use the whole value. } if (includeThis) { curNested.used.noteGatheredAll(); if (selfSelects) selfSelects->append(*selected.getClear()); if (values) values->append(*LINK(transformValue)); } } } } void UsedFieldSet::getText(StringBuffer & s) const { if (all) s.append("ALL"); s.append("["); ForEachItemIn(i, fields) { IHqlExpression & cur = fields.item(i); if (i) s.append(","); s.append(cur.queryName()); if (cur.isDatarow()) { NestedField * match = findNested(&cur); if (!match->used.checkAllFieldsUsed()) match->used.getText(s); } } s.append("]"); } void UsedFieldSet::intersectFields(const UsedFieldSet & source) { if (source.includeAll()) return; if (includeAll()) set(source); else { finalRecord.clear(); ForEachItemInRev(i1, fields) { IHqlExpression & field = fields.item(i1); if (!field.isDatarow() && !source.contains(field)) { fields.remove(i1); #ifdef USE_IPROJECT_HASH hash.remove(&field); #endif } } ForEachItemInRev(i2, nested) { NestedField & cur = nested.item(i2); NestedField * match = source.findNested(cur.field); //MORE: If we never modify items that have been all set then the following will need changing: if (match) { cur.used.intersectFields(match->used); } else { cur.clear(); } } } } void UsedFieldSet::optimizeFieldsToBlank(const UsedFieldSet & allAssigned, IHqlExpression * transform) { //MORE: //this contains a list of fields that can be blanked instead of assigning. //If there is a sequence of assignments SELF.x := LEFT.x then //a) the the field will already be in the input and output records (since it is a rollup/iterate) //b) if the previous field is assigned, then it may generate more efficient code to also assign this // field rather than blanking it. //Therefore we should walk the transform, and if a field is an exception and previous field is used //and possibly the exception is fixed length, then remove it from the exceptions. } bool UsedFieldSet::requiresFewerFields(const UsedFieldSet & other) const { if (includeAll()) return false; return (fields.ordinality() < other.fields.ordinality()); } void UsedFieldSet::unionFields(const UsedFieldSet & source) { if (includeAll()) return; if (source.includeAll()) set(source); else { ForEachItemIn(i, source.fields) { IHqlExpression & field = source.fields.item(i); if (!contains(field)) appendField(OLINK(field)); } ForEachItemIn(i1, source.nested) { NestedField & cur = source.nested.item(i1); NestedField * match = findNested(cur.field); if (match) match->used.unionFields(cur.used); else nested.append(*cur.clone()); } } } bool UsedFieldSet::isEmpty() const { ForEachItemIn(i1, fields) { IHqlExpression & cur = fields.item(i1); if (!cur.isDatarow()) return false; } ForEachItemIn(i2, nested) { if (!nested.item(i2).isEmpty()) return false; } return true; } void UsedFieldSet::kill() { #ifdef USE_IPROJECT_HASH hash.kill(); #endif fields.kill(); nested.kill(); all = false; maxGathered = 0; finalRecord.clear(); } void UsedFieldSet::set(const UsedFieldSet & source) { kill(); clone(source); } void UsedFieldSet::setAll() { if (all) return; assertex(originalFields); kill(); clone(*originalFields); } void UsedFieldSet::setRecord(IHqlExpression * record) { assertex(fields.ordinality() == 0); all = true; unwindFields(fields, record); ForEachItemIn(i, fields) { IHqlExpression & cur = fields.item(i); if (cur.isDatarow()) { NestedField * child = new NestedField(&cur, NULL); child->used.setRecord(cur.queryRecord()); nested.append(*child); } } finalRecord.set(record->queryBody()); originalFields = this; } static UsedFieldSet * addNestedField(UsedFieldSet & fields, IHqlExpression * expr, IHqlExpression * selector) { if (expr == selector) return &fields; IHqlExpression * ds = expr->queryChild(0); UsedFieldSet * parent = addNestedField(fields, ds, selector); if (parent) { NestedField * nested = parent->addNested(expr->queryChild(1)); if (!nested || nested->includeAll()) return NULL; return &nested->used; } return NULL; } bool processMatchingSelector(UsedFieldSet & fields, IHqlExpression * select, IHqlExpression * selector) { if (select == selector) { fields.setAll(); return true; } if (select->getOperator() != no_select) return false; //Could be .blah.ds - queryDatasetSelector needs to be applied to the lhs. IHqlExpression * root = queryDatasetCursor(select->queryChild(0)); if (root == selector) { if (select->isDatarow()) { UsedFieldSet * nested = addNestedField(fields, select, selector); if (nested) nested->setAll(); } else { IHqlExpression * ds = select->queryChild(0); IHqlExpression * field = select->queryChild(1); UsedFieldSet * nested = addNestedField(fields, ds, selector); if (nested) nested->addUnique(field); } } return false; } //--------------------------------------------------------------------------------------------------------------------- int RecordOrderComparer::docompare(const void * l,const void * r) const { IHqlExpression * lExpr = (IHqlExpression *)l; IHqlExpression * rExpr = (IHqlExpression *)r; return fields.compareOrder(lExpr, rExpr); } //--------------------------------------------------------------------------------------------------------------------- static unsigned getActivityCost(IHqlExpression * expr, ClusterType targetClusterType) { switch (targetClusterType) { case ThorCluster: case ThorLCRCluster: { switch (expr->getOperator()) { case no_sort: //MORE: What about checking for grouped! if (!expr->hasProperty(localAtom)) return CostNetworkCopy; return CostManyCopy; case no_shuffle: if (!expr->hasProperty(localAtom) && !isGrouped(expr)) return CostNetworkCopy; break; case no_group: if (!expr->hasProperty(localAtom)) return CostNetworkGroup; break; case no_keyeddistribute: case no_distribute: case no_cosort: return CostNetworkCopy; case no_topn: if (!expr->hasProperty(localAtom)) return CostGlobalTopN; break; case no_selfjoin: if (!expr->hasProperty(localAtom)) return CostNetworkCopy; break; case no_denormalize: case no_denormalizegroup: case no_join: case no_joincount: if (!expr->hasProperty(localAtom)) { if (isKeyedJoin(expr)) break; if (expr->hasProperty(lookupAtom)) return CostNetworkCopy/2; //insert on rhs. return CostNetworkCopy; } break; //case no_dedup: all non local, may be worth it.. } } } return 0; } //MORE: Should cache this in the extra for a record, quite possibly with the unwound fields as well. bool isSensibleRecord(IHqlExpression * record) { ForEachChild(i, record) { IHqlExpression * cur = record->queryChild(i); switch (cur->getOperator()) { case no_record: if (!isSensibleRecord(cur)) return false; break; case no_ifblock: return false; case no_field: //Could loosen this condition so that it didn't use any fields within the record. switch (cur->queryType()->getTypeCode()) { case type_alien: return false; case type_table: case type_groupedtable: { //disqualify datasets with no_selfref counts/lengths IHqlExpression * limit = cur->queryProperty(countAtom); if (!limit) limit = cur->queryProperty(sizeAtom); if (limit && !limit->isConstant()) return false; break; } } break; } } return true; } IHqlExpression * queryRootSelector(IHqlExpression * select) { loop { if (select->hasProperty(newAtom)) return select; IHqlExpression * ds = select->queryChild(0); if (ds->getOperator() != no_select) return select; select = ds; } } static node_operator queryCompoundOp(IHqlExpression * expr) { switch (expr->getOperator()) { case no_table: return no_compound_diskread; case no_newkeyindex: return no_compound_indexread; case no_dataset_alias: case no_preservemeta: return queryCompoundOp(expr->queryChild(0)); } throwUnexpectedOp(expr->getOperator()); } static int compareHqlExprPtr(IInterface * * left, IInterface * * right) { return *left == *right ? 0 : *left < *right ? -1 : +1; } //------------------------------------------------------------------------ ImplicitProjectInfo::ImplicitProjectInfo(IHqlExpression * _original, ProjectExprKind _kind) : NewTransformInfo(_original), kind(_kind) { visited = false; gatheredSelectsUsed = false; //The following logically belong to the complexProjectInfo, see note in header canOptimize = true; insertProject = false; alreadyInScope = false; canReorderOutput = true; calcedReorderOutput = false; visitedAllowingActivity = false; } void ImplicitProjectInfo::addActiveSelect(IHqlExpression * select) { if (selectsUsed.find(*select) == NotFound) selectsUsed.append(*select); } void ImplicitProjectInfo::addActiveSelects(const SelectUsedArray & src) { unsigned numSrc = src.ordinality(); if (numSrc == 0) return; if (selectsUsed.ordinality() == 0) { //No need to check for pre-existence, can be significant selectsUsed.ensure(numSrc); for (unsigned i=0; i < numSrc; i++) selectsUsed.append(src.item(i)); } else { //MORE: Should only check if exists in pre-existing selects otherwise O(N^2) in items added for (unsigned i=0; i < numSrc; i++) addActiveSelect(&src.item(i)); } } void ImplicitProjectInfo::removeProductionSelects() { ForEachItemInRev(i, selectsUsed) { IHqlExpression & cur = selectsUsed.item(i); if ((cur.getOperator() == no_matchattr) || (cur.queryChild(0)->getOperator() == no_matchattr)) selectsUsed.remove(i); } } void ImplicitProjectInfo::removeScopedFields(IHqlExpression * selector) { ForEachItemInRev(i, selectsUsed) { IHqlExpression & cur = selectsUsed.item(i); if ((&cur == selector) || ((cur.getOperator() == no_select) && (queryDatasetCursor(cur.queryChild(0)) == selector))) selectsUsed.remove(i); } } void ImplicitProjectInfo::removeRowsFields(IHqlExpression * expr, IHqlExpression * left, IHqlExpression * right) { node_operator rowsSide = queryHasRows(expr); if (rowsSide == no_none) return; IHqlExpression * rowsid = expr->queryProperty(_rowsid_Atom); switch (rowsSide) { case no_left: { OwnedHqlExpr rowsExpr = createDataset(no_rows, LINK(left), LINK(rowsid)); removeScopedFields(rowsExpr); break; } case no_right: { OwnedHqlExpr rowsExpr = createDataset(no_rows, LINK(right), LINK(rowsid)); removeScopedFields(rowsExpr); break; } default: throwUnexpectedOp(rowsSide); } } //------------------------------------------------------------------------ ComplexImplicitProjectInfo::ComplexImplicitProjectInfo(IHqlExpression * _original, ProjectExprKind _kind) : ImplicitProjectInfo(_original, _kind) { } void ComplexImplicitProjectInfo::addAllOutputs() { outputFields.setAll(); } IHqlExpression * ComplexImplicitProjectInfo::createOutputProject(IHqlExpression * ds) { if (ds->getOperator() == no_null) return createDataset(no_null, LINK(queryOutputRecord())); OwnedHqlExpr seq = createSelectorSequence(); OwnedHqlExpr left = createSelector(no_left, ds, seq); OwnedHqlExpr self = getSelf(queryOutputRecord()); IHqlExpression * transform = createMappingTransform(self, left); if (ds->isDataset()) return createDataset(no_hqlproject, LINK(ds), createComma(transform, LINK(seq))); return createRow(no_projectrow, LINK(ds), createComma(transform, LINK(seq))); } void ComplexImplicitProjectInfo::finalizeOutputRecord() { //MORE: Create them in the same order as the original record + don't change if numOutputFields = numOriginalOutputFields if (!queryOutputRecord()) { bool canPack = (safeToReorderOutput() && okToOptimize()); outputFields.calcFinalRecord(canPack, false); } } unsigned ComplexImplicitProjectInfo::queryCostFactor(ClusterType targetClusterType) { //MORE: Could cache the value, but this option isn't really used, and not called a lot. return getActivityCost(original, targetClusterType); } void ComplexImplicitProjectInfo::stopOptimizeCompound(bool cascade) { if (cascade) { canOptimize = false; ForEachItemIn(i, inputs) inputs.item(i).stopOptimizeCompound(cascade); } else if (kind == CompoundableActivity) canOptimize = false; } void ComplexImplicitProjectInfo::trace() { StringBuffer s; if (original->queryName()) s.append(original->queryName()).append(" := "); s.append(getOpString(original->getOperator())); DBGLOG("%s", s.str()); switch (getChildDatasetType(original)) { case childdataset_none: case childdataset_many_noscope: case childdataset_many: case childdataset_if: case childdataset_case: case childdataset_map: case childdataset_dataset_noscope: case childdataset_nway_left_right: break; case childdataset_dataset: case childdataset_datasetleft: case childdataset_top_left_right: case childdataset_same_left_right: trace("input", leftFieldsRequired); break; case childdataset_left: trace("left", leftFieldsRequired); break; case childdataset_leftright: trace("left", leftFieldsRequired); trace("right", rightFieldsRequired); break; } trace("output", outputFields); } void ComplexImplicitProjectInfo::trace(const char * label, const UsedFieldSet & fields) { StringBuffer s; s.append(" ").append(label).append(": "); fields.getText(s); DBGLOG("%s", s.str()); } void ComplexImplicitProjectInfo::inheritRequiredFields(const UsedFieldSet & requiredList) { //Temporary code to avoid a check. It is permissible for the fields of an AnyTypeActivity to not match if ((activityKind() == AnyTypeActivity) && !outputFields.includeAll() && requiredList.includeAll()) outputFields.setOriginal(requiredList.queryOriginal()); outputFields.unionFields(requiredList); } void ComplexImplicitProjectInfo::notifyRequiredFields(ComplexImplicitProjectInfo * whichInput) { if (activityKind() == PassThroughActivity) { whichInput->inheritRequiredFields(outputFields); } else if ((activityKind() == RollupTransformActivity) || (activityKind() == IterateTransformActivity)) { whichInput->inheritRequiredFields(leftFieldsRequired); whichInput->inheritRequiredFields(rightFieldsRequired); } else if (original->getOperator() == no_fetch) { assertex(whichInput == &inputs.item(0)); whichInput->inheritRequiredFields(rightFieldsRequired); } else if (whichInput == &inputs.item(0)) { whichInput->inheritRequiredFields(leftFieldsRequired); //can occur if same dataset is used for left and right - e.g., non-symmetric self join if ((inputs.ordinality() > 1) && (whichInput == &inputs.item(1))) whichInput->inheritRequiredFields(rightFieldsRequired); } else if (whichInput == &inputs.item(1)) { whichInput->inheritRequiredFields(rightFieldsRequired); } else if (inputs.contains(*whichInput)) whichInput->addAllOutputs(); else throwUnexpected(); } bool ComplexImplicitProjectInfo::safeToReorderOutput() { if (!calcedReorderOutput) { canReorderOutput = true; switch (activityKind()) { case FixedInputActivity: //can occur with weird operations in the middle of a dataset. Should probably only set if an action. canReorderOutput = false; break; default: ForEachItemIn(i, outputs) { if (!outputs.item(i).safeToReorderInput()) { canReorderOutput = false; break; } } break; } calcedReorderOutput = true; } return canReorderOutput; } bool ComplexImplicitProjectInfo::safeToReorderInput() { switch (activityKind()) { case CreateRecordActivity: case CreateRecordLRActivity: case ScalarSelectActivity: //These activities have remove the constraints of the inputs on their outputs. return true; case FixedInputActivity: return false; } return safeToReorderOutput(); } void ComplexImplicitProjectInfo::setMatchingOutput(ComplexImplicitProjectInfo * other) { assertex(other->queryOutputRecord()); outputFields.set(other->outputFields); } //----------------------------------------------------------------------------------------------- static HqlTransformerInfo implicitProjectTransformerInfo("ImplicitProjectTransformer"); ImplicitProjectTransformer::ImplicitProjectTransformer(HqlCppTranslator & _translator, bool _optimizeSpills) : NewHqlTransformer(implicitProjectTransformerInfo), translator(_translator) { const HqlCppOptions & transOptions = translator.queryOptions(); targetClusterType = translator.getTargetClusterType(); options.isRoxie = (targetClusterType == RoxieCluster); options.optimizeProjectsPreservePersists = transOptions.optimizeProjectsPreservePersists; options.autoPackRecords = transOptions.autoPackRecords; options.notifyOptimizedProjects = translator.notifyOptimizedProjectsLevel(); options.optimizeSpills = _optimizeSpills; options.enableCompoundCsvRead = translator.queryOptions().enableCompoundCsvRead; options.projectNestedTables = translator.queryOptions().projectNestedTables; allowActivity = true; options.insertProjectCostLevel = 0; if (transOptions.reduceNetworkTraffic) options.insertProjectCostLevel = (transOptions.insertProjectCostLevel != (unsigned)-1) ? transOptions.insertProjectCostLevel : CostNetworkCopy; } void ImplicitProjectTransformer::analyseExpr(IHqlExpression * expr) { ImplicitProjectInfo * extra = queryBodyExtra(expr); ComplexImplicitProjectInfo * complexExtra = extra->queryComplexInfo(); if (complexExtra) { if (complexExtra->alreadyInScope) return; if (!options.autoPackRecords) complexExtra->setReorderOutput(false); if (extra->checkAlreadyVisited()) { //Don't allow modification if referenced from activity and non-activity context if (allowActivity) { if ((extra->activityKind() != NonActivity) || (expr->getOperator() == no_record)) return; //either allowed before, but tagged as a non //If previously this was called in an allowactivity context it must have been explicitly disabled, so no point recursing. if (complexExtra->visitedAllowingActivity) return; //otherwise, probably worth recursing again... extra->preventOptimization(); } else { extra->preventOptimization(); return; } } if (allowActivity) complexExtra->visitedAllowingActivity = true; } else { if (extra->checkAlreadyVisited()) return; } node_operator op = expr->getOperator(); switch (op) { case no_record: { complexExtra->outputFields.setRecord(expr); return; } case no_constant: case no_attr: return; case no_transform: case no_newtransform: case no_transformlist: case no_list: if (expr->isConstant()) return; break; } ITypeInfo * type = expr->queryType(); if (allowActivity) { switch (op) { case no_evaluate: throwUnexpected(); case no_select: if (expr->isDataset() || expr->isDatarow()) { //MORE: These means that selects from a parent dataset don't project down the parent dataset. //I'm not sure how big an issue that would be. allowActivity = false; Parent::analyseExpr(expr); allowActivity = true; assertex(extra->activityKind() == SourceActivity); activities.append(*LINK(expr)); IHqlExpression * record = expr->queryRecord(); complexExtra->setOriginalRecord(queryBodyComplexExtra(record)); analyseExpr(record); } else if (isNewSelector(expr)) { Parent::analyseExpr(expr); assertex(extra->activityKind() == ScalarSelectActivity); if (expr->hasProperty(newAtom)) connect(expr->queryChild(0), expr); activities.append(*LINK(expr)); } gatherFieldsUsed(expr, extra); return; case no_activerow: assertex(extra->activityKind() == SimpleActivity); allowActivity = false; Parent::analyseExpr(expr); allowActivity = true; activities.append(*LINK(expr)); gatherFieldsUsed(expr, extra); return; case no_attr: case no_attr_expr: case no_attr_link: allowActivity = false; Parent::analyseExpr(expr); allowActivity = true; return; case no_thor: if (expr->isDataset() || expr->isDatarow()) { assertex(extra->activityKind() == SimpleActivity); Parent::analyseExpr(expr); connect(expr->queryChild(0), expr); } else { assertex(extra->activityKind() == NonActivity); Parent::analyseExpr(expr); } break; case no_compound: if (expr->isDataset()) { assertex(extra->activityKind() == SimpleActivity); Parent::analyseExpr(expr); connect(expr->queryChild(1), expr); break; } assertex(extra->activityKind() == NonActivity); Parent::analyseExpr(expr); break; case no_executewhen: if (expr->isDataset()) { assertex(extra->activityKind() == SimpleActivity); Parent::analyseExpr(expr); connect(expr->queryChild(0), expr); break; } assertex(extra->activityKind() == NonActivity); Parent::analyseExpr(expr); break; case no_subgraph: assertex(extra->activityKind() == NonActivity); Parent::analyseExpr(expr); break; case no_libraryselect: assertex(extra->activityKind() == SourceActivity); analyseExpr(expr->queryChild(1)); break; case no_libraryscopeinstance: { assertex(extra->activityKind() == NonActivity); ForEachChild(i, expr) { IHqlExpression * cur = expr->queryChild(i); if (cur->isDataset()) { analyseExpr(cur); queryBodyExtra(cur)->preventOptimization(); } } break; } case no_mergejoin: case no_nwayjoin: // could probably project output of this one... case no_nwaymerge: { assertex(extra->activityKind() == SourceActivity); //Don't allow any of the inputs to be optimized - otherwise the can end up with inconsistent record types allowActivity = false; Parent::analyseExpr(expr->queryChild(0)); allowActivity = true; break; } case no_setresult: case no_ensureresult: { IHqlExpression * value = expr->queryChild(0); if (value->isDataset() || value->isDatarow())// || value->isList()) { assertex(extra->activityKind() == FixedInputActivity); analyseExpr(value); //no need to analyse other fields since they are all constant connect(value, expr); } else { assertex(extra->activityKind() == NonActivity); Parent::analyseExpr(expr); } break; } case no_newtransform: case no_transform: case no_transformlist: assertex(extra->kind == NonActivity); if (!expr->isConstant()) Parent::analyseExpr(expr); return; default: { unsigned numArgs = expr->numChildren(); unsigned first = 0; unsigned last = numArgs; unsigned start = 0; if (!expr->isAction() && !expr->isDataset() && !expr->isDatarow()) { switch (op) { case NO_AGGREGATE: case no_createset: last = 1; break; case no_sizeof: last = 0; break; default: extra->kind = NonActivity; break; } } else { IHqlExpression * record = expr->queryRecord(); if (!record && expr->queryChild(0)) record = expr->queryChild(0)->queryRecord(); if (!record || !isSensibleRecord(record)) extra->preventOptimization(); first = getFirstActivityArgument(expr); last = first + getNumActivityArguments(expr); switch (expr->getOperator()) { case no_dedup: if (dedupMatchesWholeRecord(expr)) extra->preventOptimization(); break; case no_process: extra->preventOptimization(); break; case no_executewhen: last = 1; break; case no_newkeyindex: // case no_dataset: //No point walking the transform for an index start = 3; numArgs = 4; break; case no_compound_diskaggregate: case no_compound_diskcount: case no_compound_diskgroupaggregate: case no_compound_indexaggregate: case no_compound_indexcount: case no_compound_indexgroupaggregate: //walk inside these... they're not compoundable, but they may be able to lose some fields from the transform. last = 1; break; } } for (unsigned i =start; i < numArgs; i++) { IHqlExpression * cur = expr->queryChild(i); allowActivity = (i >= first) && (i < last); analyseExpr(cur); if (allowActivity) { if (extra->kind == NonActivity) { ImplicitProjectInfo * childExtra = queryBodyExtra(cur); childExtra->preventOptimization(); } else if (!cur->isAction() && !cur->isAttribute()) { connect(cur, expr); } } } if (extra->kind == NonActivity) gatherFieldsUsed(expr, extra); allowActivity = true; } } } else { extra->preventOptimization(); switch (op) { case no_attr_expr: analyseChildren(expr); break; case no_newkeyindex: // case no_sizeof: //no point analysing parameters to keyed joins break; default: Parent::analyseExpr(expr); break; } } //Add activities in depth first order, so traversing them backwards is guaranteed to be top down. if (extra->activityKind() != NonActivity) { assertex(complexExtra); switch (extra->activityKind()) { case CreateRecordActivity: case CreateRecordLRActivity: case RollupTransformActivity: case IterateTransformActivity: case DenormalizeActivity: case CreateRecordSourceActivity: if (hasUnknownTransform(expr)) complexExtra->preventOptimization(); break; } activities.append(*LINK(expr)); IHqlExpression * child = expr->queryChild(0); switch (extra->activityKind()) { case CreateRecordActivity: setOriginal(complexExtra->leftFieldsRequired, child); break; case CreateRecordLRActivity: setOriginal(complexExtra->leftFieldsRequired, child); setOriginal(complexExtra->rightFieldsRequired, expr->queryChild(1)); break; case CompoundActivity: case CompoundableActivity: case CreateRecordSourceActivity: case AnyTypeActivity: break; case RollupTransformActivity: case IterateTransformActivity: setOriginal(complexExtra->leftFieldsRequired, child); setOriginal(complexExtra->rightFieldsRequired, child); break; case DenormalizeActivity: setOriginal(complexExtra->leftFieldsRequired, child); setOriginal(complexExtra->rightFieldsRequired, expr->queryChild(1)); break; case FixedInputActivity: assertex(child && (child->isDataset() || child->isDatarow())); setOriginal(complexExtra->leftFieldsRequired, child); if (getNumChildTables(expr) >= 2) setOriginal(complexExtra->rightFieldsRequired, expr->queryChild(1)); break; case SourceActivity: case PassThroughActivity: case ScalarSelectActivity: break; case SinkActivity: setOriginal(complexExtra->leftFieldsRequired, child); break; case SimpleActivity: if (expr->getOperator() == no_compound) setOriginal(complexExtra->leftFieldsRequired, expr->queryChild(1)); else setOriginal(complexExtra->leftFieldsRequired, child); break; default: throwUnexpected(); } } IHqlExpression * record = expr->queryRecord(); if (record && !isPatternType(type) && !expr->isTransform()) { assertex(complexExtra); complexExtra->setOriginalRecord(queryBodyComplexExtra(record)); analyseExpr(record); } gatherFieldsUsed(expr, extra); } void ImplicitProjectTransformer::connect(IHqlExpression * source, IHqlExpression * sink) { queryBodyComplexExtra(source)->outputs.append(*queryBodyComplexExtra(sink)); queryBodyComplexExtra(sink)->inputs.append(*queryBodyComplexExtra(source)); } //NB: This is very similar to the code in CHqlExpression::cacheTablesUsed() void ImplicitProjectTransformer::gatherFieldsUsed(IHqlExpression * expr, ImplicitProjectInfo * extra) { if (extra->checkGatheredSelects()) return; node_operator op = expr->getOperator(); switch (op) { case no_select: { if (options.projectNestedTables) { bool isNew; IHqlExpression * ds = querySelectorDataset(expr, isNew); if (isNew) inheritActiveFields(extra, ds); else extra->addActiveSelect(expr); } else { //Either inherit from the dataset if new, or add the root field (x.a.b only adds x.a) IHqlExpression * cur = expr; loop { IHqlExpression * ds = cur->queryChild(0); if (cur->hasProperty(newAtom)) { inheritActiveFields(extra, ds); break; } node_operator dsOp = ds->getOperator(); if (dsOp != no_select || ds->isDataset()) { if ((dsOp != no_self) && (dsOp != no_selfref)) extra->addActiveSelect(cur); break; } cur = ds; } } break; } case no_activerow: //active row used in some context extra->addActiveSelect(expr->queryChild(0)); break; case no_left: case no_right: extra->addActiveSelect(expr); //left/right used in an expression context - assume the worse.. break; case no_attr: case no_attr_link: case no_getresult: break; case no_attr_expr: { _ATOM name = expr->queryName(); if (name != _selectors_Atom) inheritActiveFields(expr, extra, 0, expr->numChildren()); } break; case no_newkeyindex: { #ifdef _DEBUG inheritActiveFields(expr, extra, 1, expr->numChildren()); extra->removeScopedFields(expr->queryChild(0)->queryNormalizedSelector()); extra->removeScopedFields(queryActiveTableSelector()); // for distributed() etc, inheritActiveFields(expr, extra, 0, 1); const SelectUsedArray & selectsUsed = extra->querySelectsUsed(); if (selectsUsed.ordinality() != 0) { StringBuffer s; ForEachItemIn(i, selectsUsed) { if (i) s.append(','); getExprECL(&selectsUsed.item(i), s); } throwError1(HQLERR_IndexHasActiveFields, s.str()); } #else inheritActiveFields(expr, extra, 3, 4); // just in case the filename is based on a parent row??? #endif break; } case no_pat_production: { inheritActiveFields(expr, extra, 0, expr->numChildren()); extra->removeProductionSelects(); break; } case no_assign: inheritActiveFields(expr, extra, 1, 2); break; /* The following can be handled using the default mechanism because we're not tracking newtables. case NO_AGGREGATE: case no_createset: */ case no_table: { inheritActiveFields(expr, extra, 0, expr->numChildren()); IHqlExpression * parent = expr->queryChild(3); if (parent) extra->removeScopedFields(parent->queryNormalizedSelector()); break; } default: { #if 0 //Optimization to enable later - if no active datasets, then can't have any active fields. //should save some processing on root datasets, but may be insignificant if (isIndependentOfScope(expr)) break; #endif unsigned max = expr->numChildren(); IHqlExpression * ds = expr->queryChild(0); switch (getChildDatasetType(expr)) { case childdataset_none: case childdataset_many_noscope: case childdataset_if: case childdataset_case: case childdataset_map: case childdataset_dataset_noscope: inheritActiveFields(expr, extra, 0, max); //None of these have any scoped arguments, so no need to remove them break; case childdataset_many: { unsigned firstAttr = getNumChildTables(expr); inheritActiveFields(expr, extra, firstAttr, max); extra->removeScopedFields(queryActiveTableSelector()); inheritActiveFields(expr, extra, 0, firstAttr); break; } case childdataset_dataset: { inheritActiveFields(expr, extra, 1, max); extra->removeScopedFields(ds->queryNormalizedSelector()); inheritActiveFields(expr, extra, 0, 1); } break; case childdataset_datasetleft: { OwnedHqlExpr left = createSelector(no_left, ds, querySelSeq(expr)); inheritActiveFields(expr, extra, 1, max); extra->removeScopedFields(left); extra->removeScopedFields(ds->queryNormalizedSelector()); extra->removeRowsFields(expr, left, NULL); inheritActiveFields(expr, extra, 0, 1); break; } case childdataset_left: { OwnedHqlExpr left = createSelector(no_left, ds, querySelSeq(expr)); inheritActiveFields(expr, extra, 1, max); extra->removeScopedFields(left); extra->removeRowsFields(expr, left, NULL); inheritActiveFields(expr, extra, 0, 1); break; } case childdataset_same_left_right: case childdataset_nway_left_right: { IHqlExpression * seq = querySelSeq(expr); OwnedHqlExpr left = createSelector(no_left, ds, seq); OwnedHqlExpr right = createSelector(no_right, ds, seq); inheritActiveFields(expr, extra, 1, max); extra->removeScopedFields(left); extra->removeScopedFields(right); extra->removeRowsFields(expr, left, right); inheritActiveFields(expr, extra, 0, 1); break; } case childdataset_top_left_right: { IHqlExpression * seq = querySelSeq(expr); OwnedHqlExpr left = createSelector(no_left, ds, seq); OwnedHqlExpr right = createSelector(no_right, ds, seq); inheritActiveFields(expr, extra, 1, max); extra->removeScopedFields(ds->queryNormalizedSelector()); extra->removeScopedFields(left); extra->removeScopedFields(right); extra->removeRowsFields(expr, left, right); inheritActiveFields(expr, extra, 0, 1); break; } case childdataset_leftright: { IHqlExpression * leftDs = expr->queryChild(0); IHqlExpression * rightDs = expr->queryChild(1); IHqlExpression * seq = querySelSeq(expr); OwnedHqlExpr left = createSelector(no_left, leftDs, seq); OwnedHqlExpr right = createSelector(no_right, rightDs, seq); inheritActiveFields(expr, extra, 2, max); extra->removeScopedFields(right); extra->removeRowsFields(expr, left, right); if (expr->getOperator() == no_normalize) { inheritActiveFields(expr, extra, 1, 2); extra->removeScopedFields(left); inheritActiveFields(expr, extra, 0, 1); } else { extra->removeScopedFields(left); inheritActiveFields(expr, extra, 0, 2); } break; } break; case childdataset_evaluate: //handled elsewhere... default: UNIMPLEMENTED; } switch (op) { case no_newparse: case no_parse: extra->removeProductionSelects(); break; } #ifdef _DEBUG //MORE: This doesn't currently cope with access to parents within normalized child datasets //e.g, sqnormds1.hql. const SelectUsedArray & selectsUsed = extra->querySelectsUsed(); if (isIndependentOfScope(expr) && selectsUsed.ordinality() != 0) { switch (expr->getOperator()) { case no_csv: case no_xml: break; default: { StringBuffer s; ForEachItemIn(i, selectsUsed) { if (i) s.append(','); getExprECL(&selectsUsed.item(i), s); } throwError1(HQLERR_GlobalHasActiveFields, s.str()); } } } #endif break; } } } const SelectUsedArray & ImplicitProjectTransformer::querySelectsUsed(IHqlExpression * expr) { ImplicitProjectInfo * extra = queryBodyExtra(expr); // gatherFieldsUsed(expr, extra); return extra->querySelectsUsed(); } ProjectExprKind ImplicitProjectTransformer::getProjectExprKind(IHqlExpression * expr) { switch (expr->getOperator()) { case no_evaluate: throwUnexpected(); case no_select: if (expr->isDataset() || expr->isDatarow()) return SourceActivity; if (isNewSelector(expr)) return ScalarSelectActivity; return NonActivity; case no_activerow: return SimpleActivity; case no_attr: case no_attr_expr: case no_attr_link: return NonActivity; case no_typetransfer: if (expr->isDataset() || expr->isDatarow()) return SourceActivity; return NonActivity; case no_thor: if (expr->isDataset() || expr->isDatarow()) return SimpleActivity; return NonActivity; case no_compound: if (expr->isDataset()) return SimpleActivity; if (expr->isDatarow()) return ComplexNonActivity; return NonActivity; case no_executewhen: if (expr->isDataset() || expr->isDatarow()) return SimpleActivity; return NonActivity; case no_subgraph: case no_libraryscopeinstance: return NonActivity; case no_mergejoin: case no_nwayjoin: // could probably project output of this one... case no_nwaymerge: case no_libraryselect: return SourceActivity; case no_setresult: case no_ensureresult: { IHqlExpression * value = expr->queryChild(0); if (value->isDataset() || value->isDatarow()) return FixedInputActivity; return NonActivity; } case no_newrow: //only used while transforming case no_newaggregate: case no_hqlproject: case no_normalize: case no_newusertable: case no_newparse: case no_newxmlparse: case no_createrow: case no_rollupgroup: case no_projectrow: return CreateRecordActivity; case no_inlinetable: case no_dataset_from_transform: return CreateRecordSourceActivity; case no_extractresult: case no_apply: return SinkActivity; case no_denormalizegroup: case no_join: case no_fetch: return CreateRecordLRActivity; case no_process: // optimization currently disabled... return PassThroughActivity; case no_iterate: return IterateTransformActivity; case no_rollup: return RollupTransformActivity; case no_denormalize: return DenormalizeActivity; case no_null: if (expr->isAction()) return NonActivity; return AnyTypeActivity; case no_skip: case no_fail: if (expr->isDataset() || expr->isDatarow()) return AnyTypeActivity; return NonActivity; case no_table: switch (expr->queryChild(2)->getOperator()) { case no_thor: case no_flat: if (expr->hasProperty(_spill_Atom) && options.isRoxie) return SourceActivity; if (options.optimizeProjectsPreservePersists) { //Don't project persists because it can mess up the redistibution code. if (expr->hasProperty(_workflowPersist_Atom)) return SourceActivity; } return CompoundableActivity; case no_csv: if (options.enableCompoundCsvRead) return CompoundableActivity; return SourceActivity; default: return SourceActivity; } case no_newkeyindex: //Not compoundable for the moment - because it causes problems with assertwild(all, ) return SourceActivity;//CompoundableActivity; case no_compound_diskread: case no_compound_disknormalize: case no_compound_indexread: case no_compound_indexnormalize: { if (options.optimizeProjectsPreservePersists) { //Don't project persists because it can mess up the redistibution code. IHqlExpression * root = queryRoot(expr); if (root && root->hasProperty(_workflowPersist_Atom)) return SourceActivity; } return CompoundActivity; } case no_compound_diskaggregate: case no_compound_diskcount: case no_compound_diskgroupaggregate: case no_compound_indexaggregate: case no_compound_indexcount: case no_compound_indexgroupaggregate: //Don't want to add projects to these... return SimpleActivity; case no_preload: case no_forcelocal: case no_workunit_dataset: case no_getgraphresult: case no_getgraphloopresult: case no_rows: return SourceActivity; case no_allnodes: case no_httpcall: case no_soapcall: case no_newsoapcall: case no_libraryinput: case no_thisnode: if (expr->isDataset() || expr->isDatarow()) return SourceActivity; return NonActivity; case no_pipe: case no_nofold: case no_nohoist: if (expr->isDataset() || expr->isDatarow()) return FixedInputActivity; return NonActivity; case no_soapcall_ds: case no_newsoapcall_ds: case no_output: case no_distribution: case no_buildindex: case no_spill: case no_setgraphresult: case no_setgraphloopresult: case no_spillgraphresult: //MORE: Rethink these later: case no_combine: case no_combinegroup: case no_regroup: case no_loop: case no_graphloop: case no_filtergroup: //anything else would be tricky... case no_normalizegroup: return FixedInputActivity; case no_aggregate: if (expr->hasProperty(mergeTransformAtom)) return FixedInputActivity; return FixedInputActivity; //MORE:???? Should be able to optimize this case no_fromxml: // A bit bit like a source activity, no transform..., but has an input return SourceActivity; case no_selfjoin: return CreateRecordActivity; case no_if: if (expr->isDataset()) return PassThroughActivity; if (expr->isDatarow()) return PassThroughActivity; return NonActivity; case no_addfiles: case no_merge: case no_nonempty: case no_cogroup: return PassThroughActivity; case no_keydiff: case no_keypatch: return NonActivity; case no_datasetfromrow: if (getNumActivityArguments(expr) == 0) return SourceActivity; return SimpleActivity; case no_newtransform: case no_transform: return NonActivity; return ComplexNonActivity; case no_record: case no_assign: case no_assignall: return NonActivity; case NO_AGGREGATE: case no_createset: return SinkActivity; case no_call: case no_externalcall: if (expr->isDataset() || expr->isDatarow()) return SourceActivity; //MORE: What about parameters?? return NonActivity; case no_commonspill: case no_readspill: return SimpleActivity; case no_writespill: return SinkActivity; case no_preservemeta: case no_dataset_alias: if (getProjectExprKind(expr->queryChild(0)) == CompoundableActivity) return CompoundableActivity; return PassThroughActivity; } ITypeInfo * type = expr->queryType(); if (!type) return NonActivity; type_t tc = type->getTypeCode(); switch (tc) { case type_void: if (getNumChildTables(expr) > 0) return SinkActivity; return NonActivity; case type_row: case type_table: case type_groupedtable: break; case type_transform: return NonActivity; default: return NonActivity; } if (getNumActivityArguments(expr) == 0) return SourceActivity; return SimpleActivity; } void ImplicitProjectTransformer::processSelect(ComplexImplicitProjectInfo * extra, IHqlExpression * curSelect, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect) { if (leftSelect) processMatchingSelector(extra->leftFieldsRequired, curSelect, leftSelect); if (ds) processMatchingSelector(extra->leftFieldsRequired, curSelect, ds); if (rightSelect) processMatchingSelector(extra->rightFieldsRequired, curSelect, rightSelect); switch (extra->activityKind()) { case DenormalizeActivity: //For DENORMALIZE the transform is always called, possibly multiple times. Therefore //if a field is used from the output it must be included in the input (but could be blanked) //if a field is used from LEFT then it must be in the input and the output processMatchingSelector(extra->outputFields, curSelect, leftSelect); break; case RollupTransformActivity: case IterateTransformActivity: //For ROLLUP/ITERATE the transform may or may not be called. Therefore //if a field is used from the output it is used from the input [ handled in the main processing loop] //if a field is used from LEFT then it must be in the input and the output //Anything used from the input must be in the output (but could be blanked) - handled elsewhere processMatchingSelector(extra->outputFields, curSelect, leftSelect); if (ds) processMatchingSelector(extra->outputFields, curSelect, ds); break; } } void ImplicitProjectTransformer::processSelects(ComplexImplicitProjectInfo * extra, SelectUsedArray const & selectsUsed, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect) { ForEachItemIn(i2, selectsUsed) { IHqlExpression * curSelect = &selectsUsed.item(i2); processSelect(extra, curSelect, ds, leftSelect, rightSelect); } } void ImplicitProjectTransformer::processTransform(ComplexImplicitProjectInfo * extra, IHqlExpression * transform, IHqlExpression * dsSelect, IHqlExpression * leftSelect, IHqlExpression * rightSelect) { HqlExprCopyArray assigns; unwindTransform(assigns, transform); ForEachItemIn(itr, assigns) { IHqlExpression * cur = &assigns.item(itr); //Need to handle skip attributes... if (cur->getOperator() == no_skip) { const SelectUsedArray & selectsUsed = querySelectsUsed(cur); processSelects(extra, selectsUsed, dsSelect, leftSelect, rightSelect); } else if (cur->getOperator() == no_assign) { IHqlExpression * value = cur->queryChild(1); if (!value->isPure()) { IHqlExpression * lhs = cur->queryChild(0); processMatchingSelector(extra->outputFields, lhs, lhs->queryChild(0)); const SelectUsedArray & selectsUsed = querySelectsUsed(value); processSelects(extra, selectsUsed, dsSelect, leftSelect, rightSelect); } } } } void ImplicitProjectTransformer::calculateFieldsUsed(IHqlExpression * expr) { ComplexImplicitProjectInfo * extra = queryBodyComplexExtra(expr); if (!extra->okToOptimize()) { if (expr->queryRecord()) extra->addAllOutputs(); } else { ForEachItemIn(i1, extra->outputs) extra->outputs.item(i1).notifyRequiredFields(extra); if (extra->outputFields.includeAll()) assertex(extra->queryOutputRecord() != NULL); } switch (extra->activityKind()) { case CreateRecordActivity: case CreateRecordLRActivity: { //output will now be whatever fields are required by the output fields. //input will be whatever is used in the appropriate transforms. IHqlExpression * transform = queryNewColumnProvider(expr); if (!isKnownTransform(transform)) { extra->leftFieldsRequired.setAll(); if (extra->activityKind() == CreateRecordLRActivity) extra->rightFieldsRequired.setAll(); break; } IHqlExpression * ds = expr->queryChild(0)->queryNormalizedSelector(); IHqlExpression * selSeq = querySelSeq(expr); OwnedHqlExpr dsSelect = LINK(ds); OwnedHqlExpr leftSelect; OwnedHqlExpr rightSelect; if (selSeq) leftSelect.setown(createSelector(no_left, ds, selSeq)); if (extra->activityKind() == CreateRecordLRActivity) rightSelect.setown(createSelector(no_right, expr->queryChild(1), selSeq)); if (expr->getOperator() == no_selfjoin) dsSelect.setown(createSelector(no_right, ds, selSeq)); //This is here to ensure that calls that have side-effects don't get removed because the fields are removed if (hasSideEffects(transform)) extra->addAllOutputs(); //MORE: querySelectsUsedForField() could be optimized by creating a map first, but it is only ~1% of time, so not really worth it. SelectUsedArray parentSelects; HqlExprArray values; extra->outputFields.gatherTransformValuesUsed(NULL, &parentSelects, &values, NULL, transform); processSelects(extra, parentSelects, dsSelect, leftSelect, rightSelect); ForEachItemIn(i, values) processSelects(extra, querySelectsUsed(&values.item(i)), dsSelect, leftSelect, rightSelect); if (!extra->outputFields.allGathered()) assertex(extra->outputFields.allGathered()); processTransform(extra, transform, dsSelect, leftSelect, rightSelect); unsigned max = expr->numChildren(); unsigned first = extra->inputs.ordinality(); if (expr->getOperator() == no_fetch) first = 2; for (unsigned i2=first; i2 < max; i2++) { IHqlExpression * cur = expr->queryChild(i2); if (cur == transform) continue; const SelectUsedArray & selectsUsed = querySelectsUsed(cur); processSelects(extra, selectsUsed, dsSelect, leftSelect, rightSelect); } switch (expr->getOperator()) { case no_newusertable: case no_hqlproject: if (extra->okToOptimize()) extra->inputs.item(0).stopOptimizeCompound(false); break; case no_newaggregate: { IHqlExpression * grouping = queryRealChild(expr, 3); if (grouping) { //Need to make sure that grouping criteria fields are also in the output ForEachChild(i, grouping) { IHqlExpression * curGrouping = grouping->queryChild(i); IHqlExpression * match = NULL; //All groupings have entries in the transform - find the corresponding field. ForEachChild(j, transform) { IHqlExpression * cur = transform->queryChild(j); IHqlExpression * rhs = cur->queryChild(1); if (rhs->getOperator() == no_activerow) rhs = rhs->queryChild(0); if (rhs == curGrouping) { match = cur->queryChild(0); break; } } assertex(match); processMatchingSelector(extra->outputFields, match, match->queryChild(0)); } } break; } } break; } case CompoundActivity: { //output will now be whatever fields are required by the output fields. //input will be the same as the output fields, since it is just a wrapper node. extra->finalizeOutputRecord(); //MORE: Not sure this is neededextra->leftFieldsRequired.clone(extra->outputFields); extra->insertProject = true; assertex(extra->inputs.ordinality() == 0); //extra->inputs.item(0).stopOptimizeCompound(true); break; } case CompoundableActivity: { //Prevent preserve meta from stripping the disk read down to a single field. if (extra->inputs.ordinality()) extra->inputs.item(0).stopOptimizeCompound(true); if (extra->okToOptimize()) extra->finalizeOutputRecord(); break; } case CreateRecordSourceActivity: case AnyTypeActivity: { if (extra->okToOptimize()) extra->finalizeOutputRecord(); break; } case PassThroughActivity: if (extra->okToOptimize()) { node_operator op = expr->getOperator(); if ((op == no_if) && expr->hasProperty(_resourced_Atom)) { extra->preventOptimization(); extra->addAllOutputs(); } else if (op == no_merge) { //Ensure all the fields used by the sort order are preserved in the input streams IHqlExpression * order = expr->queryProperty(sortedAtom); assertex(order); ForEachChild(i, order) { IHqlExpression * cur = order->queryChild(i); if (!cur->isAttribute() && !cur->isConstant()) // shouldn't really happen.. { if ((cur->getOperator() == no_select) && !isNewSelector(cur)) { IHqlExpression * ds = queryDatasetCursor(cur); IHqlExpression * field = cur->queryChild(1); if (ds == queryActiveTableSelector()) processMatchingSelector(extra->outputFields, cur, queryActiveTableSelector()); else extra->addAllOutputs(); } else extra->addAllOutputs(); } } } } //No need to do anything - inputs are taken directly from required outputs break; case ScalarSelectActivity: { IHqlExpression * root = queryRootSelector(expr); extra->leftFieldsRequired.appendField(*LINK(root->queryChild(1))); break; } case RollupTransformActivity: case IterateTransformActivity: { //currently rollup and iterate //output record is fixed by input, and never gets changed. //input is all fields used required in output (since can't change record format) plus any others used inside the transform IHqlExpression * transform = queryNewColumnProvider(expr); if (hasSideEffects(transform)) extra->addAllOutputs(); if (extra->outputFields.includeAll()) extra->leftFieldsRequired.setAll(); else { IHqlExpression * dsSelect = expr->queryChild(0)->queryNormalizedSelector(); IHqlExpression * selSeq = querySelSeq(expr); OwnedHqlExpr leftSelect = createSelector(no_left, dsSelect, selSeq); OwnedHqlExpr rightSelect = createSelector(no_right, dsSelect, selSeq); //Need to handle skip attributes... processTransform(extra, transform, dsSelect, leftSelect, rightSelect); //Rollup criteria need to be included in the fields used! unsigned max = expr->numChildren(); for (unsigned i2=1; i2 < max; i2++) { if (extra->leftFieldsRequired.includeAll()) break; IHqlExpression * cur = expr->queryChild(i2); if (cur != transform) { const SelectUsedArray & selectsUsed = querySelectsUsed(cur); processSelects(extra, selectsUsed, dsSelect, leftSelect, rightSelect); } } //NB: outputfields can extend... while (!extra->outputFields.allGathered()) { SelectUsedArray parentSelects; HqlExprArray values; HqlExprArray selfSelects; extra->outputFields.gatherTransformValuesUsed(&selfSelects, &parentSelects, &values, dsSelect, transform); \ //For ROLLUP/ITERATE the transform may or may not be called. Therefore //if a field is used from the output it is used from the input //if a field is used from LEFT then it must be in the input and the output //if a field is used from RIGHT it muse be in the output (but could be blanked) - handled elsewhere //Ensure all output rows are also included in the input dataset ForEachItemIn(i1, selfSelects) processMatchingSelector(extra->leftFieldsRequired, &selfSelects.item(i1), dsSelect); processSelects(extra, parentSelects, NULL, leftSelect, rightSelect); ForEachItemIn(i2, values) processSelects(extra, querySelectsUsed(&values.item(i2)), NULL, leftSelect, rightSelect); //If all fields selected from the output then select all fields from the input if (extra->outputFields.checkAllFieldsUsed()) extra->leftFieldsRequired.setAll(); //if selected all fields from the input then already done. if (extra->leftFieldsRequired.includeAll()) { extra->outputFields.setAll(); break; } } if (extra->leftFieldsRequired.includeAll()) extra->addAllOutputs(); } break; } case DenormalizeActivity: { //output record is fixed by input //input is all fields used required in output (since can't change record format) plus any others used inside the transform IHqlExpression * transform = queryNewColumnProvider(expr); if (hasSideEffects(transform)) extra->addAllOutputs(); if (extra->outputFields.includeAll()) extra->leftFieldsRequired.setAll(); IHqlExpression * left = expr->queryChild(0)->queryNormalizedSelector(); IHqlExpression * right = expr->queryChild(1)->queryNormalizedSelector(); IHqlExpression * selSeq = querySelSeq(expr); OwnedHqlExpr leftSelect = createSelector(no_left, left, selSeq); OwnedHqlExpr rightSelect = createSelector(no_right, right, selSeq); processTransform(extra, transform, NULL, leftSelect, rightSelect); //include all other attributes except for the transform unsigned max = expr->numChildren(); for (unsigned i2=2; i2 < max; i2++) { IHqlExpression * cur = expr->queryChild(i2); if (cur != transform) { const SelectUsedArray & selectsUsed = querySelectsUsed(cur); processSelects(extra, selectsUsed, NULL, leftSelect, rightSelect); } } while (!extra->outputFields.allGathered()) { SelectUsedArray parentSelects; HqlExprArray values; HqlExprArray selfSelects; extra->outputFields.gatherTransformValuesUsed(&selfSelects, &parentSelects, &values, left, transform); //For DENORMALIZE the transform is always called, possibly multiple times. Therefore //if a field is used from the output it must be included in the input (but could be blanked) //if a field is used from LEFT then it must be in the input and the output //Ensure all output rows are also included in the input dataset ForEachItemIn(i1, selfSelects) processMatchingSelector(extra->leftFieldsRequired, &selfSelects.item(i1), left); // more: Could blank processSelects(extra, parentSelects, NULL, leftSelect, rightSelect); ForEachItemIn(i2, values) processSelects(extra, querySelectsUsed(&values.item(i2)), NULL, leftSelect, rightSelect); } break; } case FixedInputActivity: { extra->leftFieldsRequired.setAll(); extra->rightFieldsRequired.setAllIfAny(); if (expr->queryRecord()) extra->addAllOutputs(); break; } case SourceActivity: { //No inputs to worry about, and not compoundable so output record won't change. extra->addAllOutputs(); break; } case SinkActivity: case SimpleActivity: { //inputs will be outputs required plus any fields used within the function //outputs will eventually match inputs when finished percolating. if (extra->outputFields.includeAll()) extra->leftFieldsRequired.setAll(); else { if (extra->activityKind() != SinkActivity) extra->leftFieldsRequired.clone(extra->outputFields); IHqlExpression * ds = expr->queryChild(0)->queryNormalizedSelector(); IHqlExpression * selSeq = querySelSeq(expr); //Left and right are here because of the dedup criteria. It would be better to //special case that, but first lets get it working OwnedHqlExpr leftSelect = selSeq ? createSelector(no_left, ds, selSeq) : NULL; OwnedHqlExpr rightSelect = selSeq ? createSelector(no_right, ds, selSeq) : NULL; unsigned max = expr->numChildren(); for (unsigned i2=1; i2 < max; i2++) { const SelectUsedArray & selectsUsed = querySelectsUsed(expr->queryChild(i2)); ForEachItemIn(i3, selectsUsed) { IHqlExpression * curSelect = &selectsUsed.item(i3); processMatchingSelector(extra->leftFieldsRequired, curSelect, leftSelect); processMatchingSelector(extra->leftFieldsRequired, curSelect, rightSelect); processMatchingSelector(extra->leftFieldsRequired, curSelect, ds); if (extra->leftFieldsRequired.includeAll()) break; } if (extra->leftFieldsRequired.includeAll()) break; } if (extra->activityKind() != SinkActivity) { if (extra->leftFieldsRequired.includeAll()) extra->addAllOutputs(); } } break; } default: throwUnexpected(); } } void ImplicitProjectTransformer::logChange(const char * message, IHqlExpression * expr, const UsedFieldSet & fields) { _ATOM exprName = expr->queryName(); if (!exprName && isCompoundSource(expr)) exprName = expr->queryChild(0)->queryName(); StringBuffer name, fieldText; if (exprName) name.append(exprName).append(" "); name.append(getOpString(expr->getOperator())); const UsedFieldSet * original = fields.queryOriginal(); assertex(original); fieldText.append("(").append(fields.numFields()); fieldText.append("/").append(original->numFields()); fieldText.append(")"); //If number removed < number remaining just log the fields removed. if (fields.numFields() * 2 > original->numFields()) { UsedFieldSet removed; removed.createDifference(*original, fields); fieldText.append(" removed "); removed.getText(fieldText); } else fields.getText(fieldText); const char * const format = "ImplicitProject: %s %s now %s"; DBGLOG(format, message, name.str(), fieldText.str()); if (options.notifyOptimizedProjects) { if (options.notifyOptimizedProjects >= 2 || exprName) { StringBuffer messageText; messageText.appendf(format, message, name.str(), fieldText.str()); translator.addWorkunitException(ExceptionSeverityInformation, 0, messageText.str(), NULL); } } } void ImplicitProjectTransformer::getTransformedChildren(IHqlExpression * expr, HqlExprArray & children) { transformChildren(expr, children); } IHqlExpression * ImplicitProjectTransformer::createParentTransformed(IHqlExpression * expr) { OwnedHqlExpr transformed = Parent::createTransformed(expr); updateOrphanedSelectors(transformed, expr); return transformed.getClear(); } IHqlExpression * ImplicitProjectTransformer::createTransformed(IHqlExpression * expr) { if (expr->isConstant()) { switch (expr->getOperator()) { case no_transform: case no_newtransform: case no_transformlist: case no_list: return LINK(expr); } } //Can't call Parent::createTransformed as a default because the TranformRecordActivities trigger asserts when types mismatch. ImplicitProjectInfo * extra = queryBodyExtra(expr); ComplexImplicitProjectInfo * complexExtra = extra->queryComplexInfo(); if (!complexExtra) return createParentTransformed(expr); OwnedHqlExpr transformed; switch (extra->activityKind()) { case DenormalizeActivity: case RollupTransformActivity: case IterateTransformActivity: { //Always reduce things that create a new record so they only project the fields they need to if (complexExtra->outputChanged() || !complexExtra->fieldsToBlank.isEmpty()) { unsigned transformPos = queryTransformIndex(expr); //Walk transform, only including assigns that are in the output list. HqlExprArray args; getTransformedChildren(expr, args); //MORE: If the input's output contains fields that are not required in this transforms output then //include them, but assign them default values to stop them pulling in other variables. IHqlExpression * transform = &args.item(transformPos); IHqlExpression * newTransform = complexExtra->outputFields.createFilteredTransform(transform, &complexExtra->fieldsToBlank); args.replace(*newTransform, transformPos); transformed.setown(expr->clone(args)); transformed.setown(updateSelectors(transformed, expr)); logChange("Transform", expr, complexExtra->outputFields); } else { #ifdef _DEBUG IHqlExpression * ds = expr->queryChild(0); OwnedHqlExpr transformedDs = transform(ds); assertex(recordTypesMatch(ds, transformedDs)); #endif transformed.setown(createParentTransformed(expr)); //MORE: Need to replace left/right with their transformed varieties because the record may have changed format transformed.setown(updateSelectors(transformed, expr)); } break; } case CreateRecordActivity: case CreateRecordLRActivity: { //Always reduce things that create a new record so they only project the fields they need to if (complexExtra->outputChanged()) { unsigned transformPos = queryTransformIndex(expr); //Walk transform, only including assigns that are in the output list. HqlExprArray args; getTransformedChildren(expr, args); IHqlExpression * transform = &args.item(transformPos); IHqlExpression * newTransform = complexExtra->outputFields.createFilteredTransform(transform, NULL); args.replace(*newTransform, transformPos); if (transform->getOperator() == no_newtransform) args.replace(*LINK(complexExtra->queryOutputRecord()), transformPos-1); IHqlExpression * onFail = queryProperty(onFailAtom, args); if (onFail) { IHqlExpression * newTransform = complexExtra->outputFields.createFilteredTransform(onFail->queryChild(0), NULL); IHqlExpression * newOnFail = createExprAttribute(onFailAtom, newTransform); args.replace(*newOnFail, args.find(*onFail)); } //We may have converted a count project into a project..... (see bug18839.xhql) if (expr->getOperator() == no_hqlproject) { IHqlExpression * countProjectAttr = queryProperty(_countProject_Atom, args); if (countProjectAttr && !transformContainsCounter(newTransform, countProjectAttr->queryChild(0))) args.zap(*countProjectAttr); } transformed.setown(expr->clone(args)); transformed.setown(updateSelectors(transformed, expr)); logChange("Minimize", expr, complexExtra->outputFields); } else { transformed.setown(createParentTransformed(expr)); //MORE: Need to replace left/right with their transformed varieties because the record may have changed format transformed.setown(updateSelectors(transformed, expr)); } break; } case CreateRecordSourceActivity: { assertex(expr->getOperator() == no_inlinetable || expr->getOperator() == no_dataset_from_transform); //Always reduce things that create a new record so they only project the fields they need to if (complexExtra->outputChanged()) { HqlExprArray args; switch (expr->getOperator()) { case no_inlinetable: { IHqlExpression * transforms = expr->queryChild(0); HqlExprArray newTransforms; ForEachChild(i, transforms) { IHqlExpression * transform = transforms->queryChild(i); newTransforms.append(*complexExtra->outputFields.createFilteredTransform(transform, NULL)); } args.append(*transforms->clone(newTransforms)); break; } case no_dataset_from_transform: { IHqlExpression * transform = expr->queryChild(1); args.append(*LINK(expr->queryChild(0))); args.append(*complexExtra->outputFields.createFilteredTransform(transform, NULL)); break; } } args.append(*LINK(complexExtra->queryOutputRecord())); unwindChildren(args, expr, 2); transformed.setown(expr->clone(args)); logChange("Minimize", expr, complexExtra->outputFields); } else { transformed.setown(createParentTransformed(expr)); //MORE: Need to replace left/right with their transformed varieties because the record may have changed format transformed.setown(updateSelectors(transformed, expr)); } break; } case CompoundActivity: { transformed.setown(createParentTransformed(expr)); if (complexExtra->outputChanged()) { HqlExprArray args; args.append(*complexExtra->createOutputProject(transformed->queryChild(0))); transformed.setown(transformed->clone(args)); logChange("Project output from compound", expr, complexExtra->outputFields); break; } } case CompoundableActivity: { transformed.setown(createParentTransformed(expr)); //insert a project after the record. if (complexExtra->outputChanged()) { transformed.setown(complexExtra->createOutputProject(transformed)); transformed.setown(createWrapper(queryCompoundOp(expr), transformed.getClear())); logChange("Project output from", expr, complexExtra->outputFields); } break; } case AnyTypeActivity: { transformed.setown(createParentTransformed(expr)); //insert a project after the record. if (complexExtra->outputChanged()) { logChange("Change format of dataset", expr, complexExtra->outputFields); HqlExprArray args; args.append(*LINK(complexExtra->queryOutputRecord())); unwindChildren(args, transformed, 1); transformed.setown(transformed->clone(args)); } break; } case FixedInputActivity: case SourceActivity: case NonActivity: case ScalarSelectActivity: case SinkActivity: transformed.setown(createParentTransformed(expr)); //can't change... break; case PassThroughActivity: if (complexExtra->outputChanged()) { HqlExprArray args; ForEachChild(i, expr) { IHqlExpression * cur = expr->queryChild(i); OwnedHqlExpr next = transform(cur); if (cur->isDataset() || cur->isDatarow()) { //Ensure all inputs have same format.. if (cur->queryRecord() != complexExtra->queryOutputRecord()) next.setown(complexExtra->createOutputProject(next)); } args.append(*next.getClear()); } transformed.setown(expr->clone(args)); logChange("Passthrough modified", expr, complexExtra->outputFields); } else transformed.setown(createParentTransformed(expr)); break; case SimpleActivity: { transformed.setown(createParentTransformed(expr)); IHqlExpression * onFail = transformed->queryProperty(onFailAtom); if (onFail) { IHqlExpression * newTransform = complexExtra->outputFields.createFilteredTransform(onFail->queryChild(0), NULL); IHqlExpression * newOnFail = createExprAttribute(onFailAtom, newTransform); transformed.setown(replaceOwnedProperty(transformed, newOnFail)); } if (complexExtra->insertProject) { HqlExprArray args; OwnedHqlExpr inputProject = complexExtra->createOutputProject(transformed->queryChild(0)); OwnedHqlExpr replacement = replaceChildDataset(transformed, inputProject, 0); transformed.setown(updateSelectors(replacement, expr)); logChange("Insert project before", expr, complexExtra->outputFields); } else transformed.setown(updateSelectors(transformed, expr)); break; } default: throwUnexpected(); } return transformed.getClear(); } ANewTransformInfo * ImplicitProjectTransformer::createTransformInfo(IHqlExpression * expr) { ProjectExprKind kind = getProjectExprKind(expr); node_operator op = expr->getOperator(); if (kind == NonActivity) { switch (op) { case no_record: case no_rowset: case no_rowsetrange: case no_datasetlist: break; default: return CREATE_NEWTRANSFORMINFO2(ImplicitProjectInfo, expr, kind); } } if (kind == ComplexNonActivity) kind = NonActivity; return CREATE_NEWTRANSFORMINFO2(ComplexImplicitProjectInfo, expr, kind); } void ImplicitProjectTransformer::finalizeFields() { ForEachItemIn(i, activities) finalizeFields(&activities.item(i)); } static bool requiresFewerFields(const UsedFieldSet & fields, ComplexImplicitProjectInfo & input) { return fields.requiresFewerFields(input.outputFields); } void ImplicitProjectTransformer::finalizeFields(IHqlExpression * expr) { ComplexImplicitProjectInfo * extra = queryBodyComplexExtra(expr); if (!extra->okToOptimize()) return; switch (extra->activityKind()) { case CreateRecordActivity: case CreateRecordLRActivity: case CompoundActivity: case CompoundableActivity: case CreateRecordSourceActivity: case AnyTypeActivity: extra->finalizeOutputRecord(); break; case DenormalizeActivity: case RollupTransformActivity: case IterateTransformActivity: { //output must always match the input..., but any fields that are in the input, but not needed in the output we'll add as exceptions //and assign default values to them, otherwise it can cause other fields to be required in the input + causes chaos extra->fieldsToBlank.createDifference(extra->inputs.item(0).outputFields, extra->outputFields); extra->outputFields.unionFields(extra->fieldsToBlank); extra->fieldsToBlank.optimizeFieldsToBlank(extra->outputFields, queryNewColumnProvider(expr)); if (!extra->fieldsToBlank.isEmpty()) { const char * opString = getOpString(expr->getOperator()); StringBuffer fieldText; extra->fieldsToBlank.getText(fieldText); DBGLOG("ImplicitProject: Fields %s for %s not required by outputs - so blank in transform", fieldText.str(), opString); } extra->finalizeOutputRecord(); break; } case FixedInputActivity: case SourceActivity: case ScalarSelectActivity: break; case PassThroughActivity: { //Banches coming into this IF/MERGE etc. may have different fields (e.g., because of ITERATEs), and //the output fields may be smaller (e.g., no merge sort conditions, no fields used and inputs filter) //So use the intersection of the inputfields as the output record. 90% of the time they will be //the same so no projects will be introduced. bool anyProjected = false; unsigned numInputs = extra->inputs.ordinality(); for (unsigned i=0; i != numInputs; i++) { ComplexImplicitProjectInfo & cur = extra->inputs.item(i); if (!cur.outputFields.includeAll()) { extra->outputFields.set(cur.outputFields); for (unsigned i2=i+1; i2 != numInputs; i2++) { ComplexImplicitProjectInfo & cur = extra->inputs.item(i2); extra->outputFields.intersectFields(cur.outputFields); } extra->finalizeOutputRecord(); anyProjected = true; break; } } if (!anyProjected) extra->setMatchingOutput(&extra->inputs.item(0)); break; } case SinkActivity: break; case SimpleActivity: if (extra->insertProject && requiresFewerFields(extra->leftFieldsRequired, extra->inputs.item(0))) { extra->outputFields.set(extra->leftFieldsRequired); extra->finalizeOutputRecord(); } else extra->setMatchingOutput(&extra->inputs.item(0)); break; default: throwUnexpected(); } } void ImplicitProjectTransformer::inheritActiveFields(IHqlExpression * expr, ImplicitProjectInfo * extra, unsigned min, unsigned max) { for (unsigned i = min; i < max; i++) inheritActiveFields(extra, expr->queryChild(i)); } void ImplicitProjectTransformer::inheritActiveFields(ImplicitProjectInfo * target, IHqlExpression * source) { if (source->queryBody()->queryTransformExtra()) { target->addActiveSelects(querySelectsUsed(source)); } } void ImplicitProjectTransformer::insertProjects() { ForEachItemIn(i, activities) insertProjects(&activities.item(i)); } void ImplicitProjectTransformer::insertProjects(IHqlExpression * expr) { ComplexImplicitProjectInfo * extra = queryBodyComplexExtra(expr); if (!extra->okToOptimize()) return; if (options.optimizeSpills && (expr->getOperator() == no_commonspill)) { if (requiresFewerFields(extra->leftFieldsRequired, extra->inputs.item(0))) extra->insertProject = true; return; } if (options.insertProjectCostLevel == 0) return; if (extra->queryCostFactor(targetClusterType) < options.insertProjectCostLevel) return; switch (extra->activityKind()) { case SimpleActivity: if (requiresFewerFields(extra->leftFieldsRequired, extra->inputs.item(0))) extra->insertProject = true; break; } } void ImplicitProjectTransformer::percolateFields() { ForEachItemInRev(i, activities) calculateFieldsUsed(&activities.item(i)); } IHqlExpression * ImplicitProjectTransformer::process(IHqlExpression * expr) { cycle_t time1 = msTick(); analyse(expr, 0); // gather a list of activities, and link them together. cycle_t time2 = msTick(); //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.analyse", time2-time1); percolateFields(); cycle_t time3 = msTick(); //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.percolate", time3-time2); switch (targetClusterType) { case RoxieCluster: //worth inserting projects after sources that can be compound. //also may be worth projecting before iterating since an iterate //copies data but can't change the fields in use break; case HThorCluster: // same as roxie, but also maybe worth inserting projects to minimise the amount of data that is spilled. break; case ThorCluster: case ThorLCRCluster: //worth inserting projects to reduce copying, spilling, but primarily data transfered between nodes. if (options.insertProjectCostLevel || options.optimizeSpills) insertProjects(); break; } cycle_t time4 = msTick(); //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.reduceData", time4-time3); finalizeFields(); cycle_t time5 = msTick(); //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.finalize", time5-time4); //traceActivities(); OwnedHqlExpr ret = transformRoot(expr); cycle_t time6 = msTick(); //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.transform", time6-time5); return ret.getClear(); } void ImplicitProjectTransformer::traceActivities() { ForEachItemIn(i, activities) queryBodyComplexExtra(&activities.item(i))->trace(); } IHqlExpression * ImplicitProjectTransformer::updateSelectors(IHqlExpression * newExpr, IHqlExpression * oldExpr) { //MORE: Clean me up using new flags when they are merged IHqlExpression * newDs = newExpr->queryChild(0); IHqlExpression * oldDs = oldExpr->queryChild(0); switch (getChildDatasetType(newExpr)) { case childdataset_none: case childdataset_many_noscope: case childdataset_many: case childdataset_if: case childdataset_case: case childdataset_map: case childdataset_dataset_noscope: return LINK(newExpr); //None of these have any scoped arguments, so no need to remove them break; case childdataset_dataset: { return updateMappedFields(newExpr, oldDs->queryNormalizedSelector(), newDs->queryNormalizedSelector(), 1); } case childdataset_datasetleft: { OwnedHqlExpr mapped = updateMappedFields(newExpr, oldDs->queryNormalizedSelector(), newDs->queryNormalizedSelector(), 1); IHqlExpression * selSeq = querySelSeq(newExpr); assertex(selSeq == querySelSeq(oldExpr)); OwnedHqlExpr newLeft = createSelector(no_left, newDs, selSeq); OwnedHqlExpr oldLeft = createSelector(no_left, oldDs, selSeq); return updateChildSelectors(mapped, oldLeft, newLeft, 1); } case childdataset_left: { IHqlExpression * selSeq = querySelSeq(newExpr); assertex(selSeq == querySelSeq(oldExpr)); OwnedHqlExpr newLeft = createSelector(no_left, newDs, selSeq); OwnedHqlExpr oldLeft = createSelector(no_left, oldDs, selSeq); return updateChildSelectors(newExpr, oldLeft, newLeft, 1); } case childdataset_same_left_right: case childdataset_top_left_right: case childdataset_nway_left_right: { OwnedHqlExpr mapped = updateMappedFields(newExpr, oldDs->queryNormalizedSelector(), newDs->queryNormalizedSelector(), 1); IHqlExpression * selSeq = querySelSeq(newExpr); assertex(selSeq == querySelSeq(oldExpr)); OwnedHqlExpr newLeft = createSelector(no_left, newExpr->queryChild(0), selSeq); OwnedHqlExpr oldLeft = createSelector(no_left, oldExpr->queryChild(0), selSeq); OwnedHqlExpr ds1 = updateChildSelectors(mapped, oldLeft, newLeft, 1); OwnedHqlExpr newRight = createSelector(no_right, newExpr->queryChild(0), selSeq); OwnedHqlExpr oldRight = createSelector(no_right, oldExpr->queryChild(0), selSeq); return updateChildSelectors(ds1, oldRight, newRight, 1); } case childdataset_leftright: { IHqlExpression * selSeq = querySelSeq(newExpr); assertex(selSeq == querySelSeq(oldExpr)); OwnedHqlExpr newLeft = createSelector(no_left, newExpr->queryChild(0), selSeq); OwnedHqlExpr oldLeft = createSelector(no_left, oldExpr->queryChild(0), selSeq); unsigned firstLeft = (newExpr->getOperator() == no_normalize) ? 1 : 2; OwnedHqlExpr ds1 = updateChildSelectors(newExpr, oldLeft, newLeft, firstLeft); OwnedHqlExpr newRight = createSelector(no_right, newExpr->queryChild(1), selSeq); OwnedHqlExpr oldRight = createSelector(no_right, oldExpr->queryChild(1), selSeq); return updateChildSelectors(ds1, oldRight, newRight, 2); } break; default: throwUnexpected(); } } const SelectUsedArray & ImplicitProjectTransformer::querySelectsUsedForField(IHqlExpression * transform, IHqlExpression * field) { IHqlExpression * transformValues = queryTransformAssignValue(transform, field); if (!transformValues) transformValues = queryTransformAssignValue(transform, field); return querySelectsUsed(transformValues); } #include "hqlttcpp.ipp" IHqlExpression * insertImplicitProjects(HqlCppTranslator & translator, IHqlExpression * expr, bool optimizeSpills) { #if defined(POST_COMMON_ANNOTATION) HqlExprArray ret; { ImplicitProjectTransformer transformer(translator, optimizeSpills); ret.append(*transformer.process(expr)); } normalizeAnnotations(translator, ret); return createActionList(ret); #else ImplicitProjectTransformer transformer(translator, optimizeSpills); return transformer.process(expr); #endif } void insertImplicitProjects(HqlCppTranslator & translator, HqlExprArray & exprs) { if (exprs.ordinality()) { OwnedHqlExpr compound = createActionList(exprs); OwnedHqlExpr ret = insertImplicitProjects(translator, compound, false); exprs.kill(); ret->unwindList(exprs, no_actionlist); } } /* To Implement field gathering would need to do the following: - Could assert that only non-nested fields are considered. That means all field references are in the form (ds.field). This would simplify gathering, filtering, and translating the dataset selector.... - All no_selects on in-scope datasets (or sub fields?) get added to list of active fields in self. - analyseExpr() clones all active fields from children into self. - active datasets used in a dataset context need to also be added. - any item inherits all child fields from non-dataset children, and removes all references to parent datasets (in what ever form) very similar to the inScope Table processing. [Only really needed for non-global activities] - any activity inherits all inScope fields from its parent. - those expressions with scoped dataset inputs need two lists (childrensFieldAccess and inscopeFields) - may be worth having different classes for handling the different categories: (simpleExpression, scopedExpression, activity) with virtuals to handle differences - removingChildReferences would need matchesSelector(list, selector) which worked recursively. */