Browse Source

HPCC-20252 refactor index read generation to allow remote projection

Generates exactly the same code as before with option disabled

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 7 years ago
parent
commit
61c8359a9a

+ 17 - 2
common/deftype/deftype.cpp

@@ -1201,6 +1201,13 @@ void CBasedTypeInfo::serializeSkipChild(MemoryBuffer &tgt)
 
 //===========================================================================
 
+IValue * CKeyedIntTypeInfo::castFrom(bool /*isSignedValue*/, __int64 value)
+{
+    return createIntValue(value, LINK(this));
+}
+
+//===========================================================================
+
 bool CTransformTypeInfo::assignableFrom(ITypeInfo *t2)
 {
     if (getTypeCode()==t2->getTypeCode())
@@ -1835,10 +1842,10 @@ extern DEFTYPE_API ITypeInfo *makeFilePosType(ITypeInfo *basetype)
     return commonUpType(new CFilePosTypeInfo(basetype));
 }
 
-extern DEFTYPE_API ITypeInfo *makeKeyedType(ITypeInfo *basetype)
+extern DEFTYPE_API ITypeInfo *makeKeyedIntType(ITypeInfo *basetype)
 {
     assertex(basetype);
-    return commonUpType(new CKeyedTypeInfo(basetype));
+    return commonUpType(new CKeyedIntTypeInfo(basetype));
 }
 
 extern DEFTYPE_API ITypeInfo *makeDecimalType(unsigned digits, unsigned prec, bool isSigned)
@@ -2616,6 +2623,8 @@ static bool preservesValue(ITypeInfo * after, ITypeInfo * before, bool preserveI
     {
     case type_boolean: 
         return true;
+    case type_keyedint:
+        return preservesValue(after, before->queryChildType(), preserveInformation);
     case type_packedint:
         before = before->queryPromotedType();
         //fall through
@@ -2624,6 +2633,8 @@ static bool preservesValue(ITypeInfo * after, ITypeInfo * before, bool preserveI
     case type_enumerated:
         switch (afterType)
         {
+        case type_keyedint:
+            return preservesValue(after->queryChildType(), before, preserveInformation);
         case type_packedint:
             after = after->queryPromotedType();
             //fall through.
@@ -2737,6 +2748,10 @@ bool preservesOrder(ITypeInfo * after, ITypeInfo * before)
 {
     type_t beforeType = before->getTypeCode();
     type_t afterType = after->getTypeCode();
+    if (beforeType == type_keyedint)
+        return preservesOrder(after, before->queryChildType());
+    if (afterType == type_keyedint)
+        return preservesOrder(after->queryChildType(), before);
 
     switch (beforeType)
     {

+ 1 - 1
common/deftype/deftype.hpp

@@ -171,7 +171,7 @@ extern DEFTYPE_API ITypeInfo *makeSwapIntType(int size, bool isSigned);
 extern DEFTYPE_API ITypeInfo *makePackedIntType(ITypeInfo * basetype);
 extern DEFTYPE_API ITypeInfo *makePackedIntType(int size, bool isSigned);
 extern DEFTYPE_API ITypeInfo *makeFilePosType(ITypeInfo *basetype);
-extern DEFTYPE_API ITypeInfo *makeKeyedType(ITypeInfo *basetype);
+extern DEFTYPE_API ITypeInfo *makeKeyedIntType(ITypeInfo *basetype);
 extern DEFTYPE_API ITypeInfo *makeRealType(int size);
 extern DEFTYPE_API ITypeInfo *makeDataType(int size);
 extern DEFTYPE_API ITypeInfo *makeBitfieldType(int sizeInBits, ITypeInfo * basetype = NULL);

+ 6 - 3
common/deftype/deftype.ipp

@@ -599,13 +599,14 @@ public:
     virtual unsigned getCrc()                   { return basetype->getCrc(); }
 };
 
-class CKeyedTypeInfo : public CBasedTypeInfo
+class CKeyedIntTypeInfo : public CBasedTypeInfo
 {
 public:
-    CKeyedTypeInfo(ITypeInfo * _basetype) : CBasedTypeInfo(_basetype, _basetype->getSize()) {}
+    CKeyedIntTypeInfo(ITypeInfo * _basetype) : CBasedTypeInfo(_basetype, _basetype->getSize()) { promoted.setown(makeSwapIntType(length, isSigned())); }
     virtual type_t getTypeCode() const { return type_keyedint; };
 
     // Only used for generation of type information so no need to fully implement these
+    virtual IValue * castFrom(bool isSignedValue, __int64 value);
     virtual bool isSwappedEndian()              { return true; }
     virtual bool isInteger()                    { return true; };
     virtual bool isScalar()                     { return true; }
@@ -613,9 +614,11 @@ public:
     virtual unsigned getStringLen()             { return basetype->getStringLen(); }
     virtual unsigned getDigits()                { return basetype->getDigits(); }
     virtual const char *queryTypeName()         { return "keyed"; }
-    virtual ITypeInfo * queryPromotedType()     { return basetype->queryPromotedType(); }
+    virtual ITypeInfo * queryPromotedType()     { return promoted; }
     virtual StringBuffer &getECLType(StringBuffer & out) { return out.append("keyed"); }
     virtual unsigned getCrc()                   { return basetype->getCrc(); }
+protected:
+    Owned<ITypeInfo> promoted;
 };
 
 class CPackedIntTypeInfo : public CBasedTypeInfo

+ 2 - 0
ecl/hql/hqlatoms.cpp

@@ -319,6 +319,7 @@ IAtom * onceAtom;
 IAtom * onFailAtom;
 IAtom * onWarningAtom;
 IAtom * optAtom;
+IAtom * __option__Atom;
 IAtom * orderedAtom;
 IAtom * _ordered_Atom;
 IAtom * _origin_Atom;
@@ -784,6 +785,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(onFail);
     MAKEATOM(onWarning);
     MAKEATOM(opt);
+    MAKEATOM(__option__);
     MAKEATOM(ordered);
     MAKESYSATOM(ordered);
     MAKESYSATOM(origin);

+ 1 - 0
ecl/hql/hqlatoms.hpp

@@ -323,6 +323,7 @@ extern HQL_API IAtom * onceAtom;
 extern HQL_API IAtom * onFailAtom;
 extern HQL_API IAtom * onWarningAtom;
 extern HQL_API IAtom * optAtom;
+extern HQL_API IAtom * __option__Atom;
 extern HQL_API IAtom * orderedAtom;
 extern HQL_API IAtom * _ordered_Atom;
 extern HQL_API IAtom * _origin_Atom;

+ 6 - 7
ecl/hql/hqlgram.y

@@ -343,6 +343,7 @@ static void eclsyntaxerror(HqlGram * parser, const char * s, short yystate, int
   ONLY
   ONWARNING
   OPT
+  __OPTION__
   OR
   ORDERED
   OUTER
@@ -10308,14 +10309,12 @@ dsOption
                         {   $$.setExpr(createExprAttribute(maxCountAtom, $3.getExpr()), $1); }
     | AVE '(' constExpression ')'
                         {   $$.setExpr(createExprAttribute(aveAtom, $3.getExpr()), $1); }
-    | VIRTUAL '(' UNKNOWN_ID ')'
+    | __OPTION__ '(' hintList ')'
                         {
-                            IIdAtom * id = $3.getId();
-                            if (id->queryLower() != legacyAtom)
-                                parser->reportError(ERR_EXPECTED, $3, "Expected LEGACY");
-                            else
-                                parser->reportWarning(CategoryDeprecated, ERR_DEPRECATED, $1.pos, "VIRTUAL(LEGACY) will be unsupported at a later date");
-                            $$.setExpr(createExprAttribute(virtualAtom, createAttribute(id->queryLower())), $1);
+                            HqlExprArray args;
+                            $3.unwindCommaList(args);
+                            $$.setExpr(createExprAttribute(__option__Atom, args));
+                            $$.setPosition($1);
                         }
     | commonAttribute
     ;

+ 1 - 1
ecl/hql/hqlir.cpp

@@ -1871,7 +1871,6 @@ id_t ExpressionIRPlayer::doProcessType(ITypeInfo * type)
         case type_sortlist:
         case type_any:
             break;
-        case type_keyedint:
         case type_int:
         case type_swapint:
         case type_real:
@@ -1915,6 +1914,7 @@ id_t ExpressionIRPlayer::doProcessType(ITypeInfo * type)
         case type_function:
         case type_pointer:
         case type_array:
+        case type_keyedint:
             {
                 CompoundTypeBuilderInfo info;
                 info.baseType = processType(type->queryChildType());

+ 1 - 0
ecl/hql/hqllex.l

@@ -848,6 +848,7 @@ ONFAIL              { RETURNSYM(ONFAIL); }
 ONLY                { RETURNSYM(ONLY); }
 ONWARNING           { RETURNSYM(ONWARNING); }
 OPT                 { RETURNSYM(OPT); }
+__OPTION__          { RETURNSYM(__OPTION__); }
 ORDERED             { RETURNSYM(ORDERED); }
 OUTER               { RETURNSYM(OUTER); }
 OUTPUT              { 

+ 21 - 6
ecl/hql/hqlutil.cpp

@@ -361,9 +361,7 @@ IHqlExpression * convertIndexPhysical2LogicalValue(IHqlExpression * cur, IHqlExp
     else if (allowTranslate)
     {
         LinkedHqlExpr newValue = physicalSelect;
-
-        OwnedHqlExpr target = createSelectExpr(getActiveTableSelector(), LINK(cur));            // select not used, just created to get correct types.
-        ITypeInfo * type = target->queryType();
+        ITypeInfo * type = cur->queryType();
         type_t tc = type->getTypeCode();
         if (tc == type_int || tc == type_swapint)
         {
@@ -397,11 +395,28 @@ static IHqlExpression * mapIfBlockRecord(HqlMapTransformer & mapper, IHqlExpress
 }
 
 
+static IHqlExpression * mapIfBlockCondition(HqlMapTransformer & mapper, IHqlExpression * expr)
+{
+    if (expr->getOperator() == no_select)
+    {
+        OwnedHqlExpr mapped = mapper.transformRoot(expr);
+        //Need to apply biases to fields that are keyed and ensure they are correctly cast
+        if (mapped->queryType() != expr->queryType())
+            return convertIndexPhysical2LogicalValue(expr, mapped, true);
+        return mapped.getClear();
+    }
+
+    HqlExprArray args;
+    ForEachChild(i, expr)
+        args.append(*mapIfBlockCondition(mapper, expr->queryChild(i)));
+    return expr->clone(args);
+}
+
 static IHqlExpression * mapIfBlock(HqlMapTransformer & mapper, IHqlExpression * cur)
 {
     HqlExprArray args;
     unwindChildren(args, cur);
-    args.replace(*mapper.transformRoot(&args.item(0)), 0);
+    args.replace(*mapIfBlockCondition(mapper, &args.item(0)), 0);
     args.replace(*mapIfBlockRecord(mapper, &args.item(1)), 1);
     return cur->clone(args);
 }
@@ -443,8 +458,8 @@ IHqlExpression * createPhysicalIndexRecord(HqlMapTransformer & mapper, IHqlExpre
                 Owned<ITypeInfo> hozedType = getHozedKeyType(cur);
                 if (hozedType == cur->queryType())
                     newField = LINK(cur);
-                else if (createKeyedTypes)
-                    newField = createField(cur->queryId(), makeKeyedType(cur->getType()), nullptr, extractFieldAttrs(cur));
+                else if (createKeyedTypes && hozedType->isInteger())
+                    newField = createField(cur->queryId(), makeKeyedIntType(cur->getType()), nullptr, extractFieldAttrs(cur));
                 else
                     newField = createField(cur->queryId(), hozedType.getClear(), nullptr, extractFieldAttrs(cur));
             }

+ 1 - 1
ecl/hqlcpp/hqlcerrors.hpp

@@ -527,7 +527,7 @@
 #define HQLERR_IncompatibleKeyedSubString_Text  "Cannot use two different KEYED substring filters for field %s in key %s"
 #define HQLERR_NonNullChildDSDefault_Text       "Non-null child dataset may not be used as default value (target field '%s')"
 #define HQLERR_AttributeXMustBeConstant_Text    "Attribute %s must be set to a constant value"
-#define HQLERR_CannotInterpretRecord_Text       "This dataset contains deprecated record formats and virtual fields.  Remove the alien data types, or temporarily add VIRTUAL(LEGACY) to the table definition"
+#define HQLERR_CannotInterpretRecord_Text       "This dataset contains deprecated record formats and virtual fields.  Remove the alien data types, or temporarily add __OPTION__(LEGACY) to the table definition"
 
 //Warnings.
 #define HQLWRN_CannotRecreateDistribution_Text  "Cannot recreate the distribution for a persistent dataset"

+ 3 - 1
ecl/hqlcpp/hqlcpp.cpp

@@ -1814,6 +1814,7 @@ void HqlCppTranslator::cacheOptions()
         DebugOption(options.newDiskReadMapping, "newDiskReadMapping", true),
         DebugOption(options.transformNestedSequential, "transformNestedSequential", true),
         DebugOption(options.forceAllProjectedDiskSerialized, "internalForceAllProjectedDiskSerialized", false),  // Delete in 8.0 once new code has been proved in anger
+        DebugOption(options.newIndexReadMapping, "newIndexReadMapping", false), // Not yet enabled due to problems with merging mapped fields and roxie/thor integration
     };
 
     //get options values from workunit
@@ -6850,6 +6851,7 @@ void HqlCppTranslator::doBuildExprCast(BuildCtx & ctx, ITypeInfo * to, CHqlBound
                         break;
                     }
                 case type_swapint:
+                case type_keyedint:
                     {
                         unsigned toSize = to->getSize();
                         unsigned fromSize = from->getSize();
@@ -11147,7 +11149,7 @@ void HqlCppTranslator::assignAndCast(BuildCtx & ctx, const CHqlBoundTarget & tar
     case type_packedint:
         {
             unsigned fromSize = from->getSize();
-            if ((fromType == type_swapint) && !((fromSize == 1) && (toSize == 1)))
+            if ((fromType == type_swapint || fromType == type_keyedint) && !((fromSize == 1) && (toSize == 1)))
             {
                 if (fromSize != toSize)
                 {

+ 1 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -811,6 +811,7 @@ struct HqlCppOptions
     bool                newDiskReadMapping;
     bool                transformNestedSequential;
     bool                forceAllProjectedDiskSerialized;
+    bool                newIndexReadMapping;
 };
 
 //Any information gathered while processing the query should be moved into here, rather than cluttering up the translator class

+ 141 - 52
ecl/hqlcpp/hqlsource.cpp

@@ -402,6 +402,16 @@ static IHqlExpression * nextDiskField(IHqlExpression * diskRecord, unsigned & di
     }
 }
 
+static IHqlExpression * queryOriginalKey(IHqlExpression * expr)
+{
+    IHqlExpression * original = queryAttributeChild(expr, _original_Atom, 0);
+    if (original)
+        return original;
+    else
+        return expr;
+}
+
+
 
 static void createPhysicalLogicalAssigns(HqlExprArray & assigns, IHqlExpression * self, IHqlExpression * diskRecord, IHqlExpression * record, IHqlExpression * diskDataset, bool allowTranslate, unsigned fileposIndex)
 {
@@ -475,11 +485,11 @@ IHqlExpression * HqlCppTranslator::convertToPhysicalIndex(IHqlExpression * table
     IHqlExpression * record = tableExpr->queryRecord();
 
     HqlMapTransformer mapper;
-    bool hasFileposition = getBoolAttribute(tableExpr, filepositionAtom, true);
-    IHqlExpression * diskRecord = createPhysicalIndexRecord(mapper, record, hasFileposition, false);
+    bool hasFilePosition = getBoolAttribute(tableExpr, filepositionAtom, true);
+    IHqlExpression * diskRecord = createPhysicalIndexRecord(mapper, record, hasFilePosition, false);
 
     unsigned payload = numPayloadFields(tableExpr);
-    assertex(payload || !hasFileposition);
+    assertex(payload || !hasFilePosition);
     HqlExprArray args;
     unwindChildren(args, tableExpr);
     args.replace(*diskRecord, 1);
@@ -491,7 +501,7 @@ IHqlExpression * HqlCppTranslator::convertToPhysicalIndex(IHqlExpression * table
     IHqlExpression * newDataset = createDataset(tableExpr->getOperator(), args);
 
     HqlExprArray assigns;
-    createPhysicalLogicalAssigns(assigns, newDataset, tableExpr, hasFileposition);
+    createPhysicalLogicalAssigns(assigns, newDataset, tableExpr, hasFilePosition);
     OwnedHqlExpr projectedTable = createDataset(no_newusertable, newDataset, createComma(LINK(record), createValue(no_newtransform, makeTransformType(record->getType()), assigns)));
     physicalIndexCache.setValue(tableExpr, projectedTable);
     return projectedTable.getClear();
@@ -511,8 +521,26 @@ IHqlExpression * convertToPhysicalTable(IHqlExpression * tableExpr, bool ensureS
 IHqlExpression * HqlCppTranslator::buildIndexFromPhysical(IHqlExpression * expr)
 {
     IHqlExpression * tableExpr = queryPhysicalRootTable(expr);
+    OwnedHqlExpr newProject;
+    if (queryOptions().newIndexReadMapping && !recordContainsBlobs(tableExpr->queryRecord()))
+    {
+        //once it is legal for the input to a transform to be non-serialized then following should be enabled
+        //return LINK(expr);
+
+        IHqlExpression * record = tableExpr->queryChild(1);
+        OwnedHqlExpr diskRecord = getSerializedForm(record, diskAtom);
+        if (record == diskRecord)
+            return LINK(expr);
 
-    OwnedHqlExpr newProject = convertToPhysicalIndex(tableExpr);
+        OwnedHqlExpr newDataset = replaceChild(tableExpr, 1, diskRecord);
+
+        VirtualRecordTransformCreator mapper(newDataset);
+        IHqlExpression * newTransform = mapper.createMappingTransform(no_newtransform, record, newDataset);
+        newProject.setown(createDatasetF(no_newusertable, LINK(newDataset), LINK(record), newTransform, createAttribute(_internal_Atom), NULL));
+        newProject.setown(tableExpr->cloneAllAnnotations(newProject));
+    }
+    else
+        newProject.setown(convertToPhysicalIndex(tableExpr));
     return replaceExpression(expr, tableExpr, newProject);
 }
 
@@ -596,11 +624,18 @@ public:
 
 //---------------------------------------------------------------------------
 
+static bool forceLegacyMapping(IHqlExpression * expr)
+{
+    //Use __OPTION__(LEGACY(TRUE)) to force legacy mapping code
+    IHqlExpression * options = expr->queryAttribute(__option__Atom);
+    return getBoolAttribute(options, legacyAtom, false);
+}
+
 class SourceBuilder
 {
 public:
     SourceBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr)
-        : tableExpr(_tableExpr), translator(_translator)
+        : tableExpr(_tableExpr), translator(_translator), newInputMapping(false)
     { 
         nameExpr.setown(foldHqlExpression(_nameExpr));
         needDefaultTransform = true; 
@@ -633,20 +668,27 @@ public:
 
         if (tableExpr)
         {
-            IHqlExpression * virtualAttr = tableExpr->queryAttribute(virtualAtom);
-            newDiskReadMapping = translator.queryOptions().newDiskReadMapping;
-            if (virtualAttr)
-                newDiskReadMapping = !getBoolAttribute(virtualAttr, legacyAtom);
+            if (isKey(tableExpr))
+                newInputMapping = translator.queryOptions().newIndexReadMapping;
+            else
+                newInputMapping = translator.queryOptions().newDiskReadMapping;
+
+            if (forceLegacyMapping(tableExpr))
+                newInputMapping = false;
+
+            //If this index has been translated using the legacy method then ensure we continue to use that method
+            if (isKey(tableExpr) && queryAttributeChild(tableExpr, _original_Atom, 0))
+                newInputMapping = false;
             switch (tableExpr->getOperator())
             {
             case no_fetch:
             case no_compound_fetch:
-                newDiskReadMapping = false;
+                newInputMapping = false;
                 break;
             }
         }
         else
-            newDiskReadMapping = false;
+            newInputMapping = false;
     }
     virtual ~SourceBuilder() {}
 
@@ -671,6 +713,8 @@ public:
     bool containsStepping(IHqlExpression * expr);
     ABoundActivity * buildActivity(BuildCtx & ctx, IHqlExpression * expr, ThorActivityKind activityKind, const char *kind, ABoundActivity *input);
     void gatherVirtualFields(bool ignoreVirtuals, bool ensureSerialized);
+    void deduceDiskRecords();
+    void deduceIndexRecords();
     bool recordHasVirtuals()                                { return fieldInfo.hasVirtuals(); }
     bool recordHasVirtualsOrDeserialize()                               { return fieldInfo.hasVirtualsOrDeserialize(); }
     bool isSourceInvariant(IHqlExpression * dataset, IHqlExpression * expr);
@@ -728,6 +772,7 @@ public:
     HqlExprAttr     failedFilterValue;
     HqlExprAttr     compoundCountVar;
     HqlExprAttr     physicalRecord;
+    HqlExprAttr     inputRecord;        // The format of the row that is passed to the transform
     LinkedHqlExpr   expectedRecord;
     LinkedHqlExpr   projectedRecord;
     LinkedHqlExpr   tableSelector;
@@ -754,7 +799,7 @@ public:
     bool            isUnfilteredCount;
     bool            isVirtualLogicalFilenameUsed;
     bool            requiresOrderedMerge;
-    bool            newDiskReadMapping;
+    bool            newInputMapping;
     bool            extractCanMatch = false;
 protected:
     HqlCppTranslator & translator;
@@ -864,11 +909,11 @@ void SourceBuilder::analyse(IHqlExpression * expr)
     {
     case no_table:
     case no_newkeyindex:
-        if (!newDiskReadMapping)
+        if (!newInputMapping)
         {
             assertex(!fieldInfo.hasVirtuals());
         }
-        if (newDiskReadMapping)
+        if (newInputMapping)
         {
             if (!tableExpr->hasAttribute(_noVirtual_Atom) && (tableExpr->queryChild(2)->getOperator() != no_pipe))
             {
@@ -1254,7 +1299,7 @@ void SourceBuilder::buildTransformElements(BuildCtx & ctx, IHqlExpression * expr
     {
     case no_newkeyindex:
     case no_table:
-        if (newDiskReadMapping)
+        if (newInputMapping)
         {
         }
         else
@@ -2025,19 +2070,8 @@ ABoundActivity * SourceBuilder::buildActivity(BuildCtx & ctx, IHqlExpression * e
             case TAKindexgroupaggregate:
             case TAKindexexists:
             {
-                LinkedHqlExpr indexExpr = queryAttributeChild(tableExpr, _original_Atom, 0);
-                OwnedHqlExpr serializedRecord;
-                unsigned numPayload = numPayloadFields(indexExpr);
-                if (numPayload)
-                    serializedRecord.setown(notePayloadFields(indexExpr->queryRecord(), numPayload));
-                else
-                    serializedRecord.set(indexExpr->queryRecord());
-                serializedRecord.setown(getSerializedForm(serializedRecord, diskAtom));
-
-                bool hasFilePosition = getBoolAttribute(indexExpr, filepositionAtom, true);
-                serializedRecord.setown(createMetadataIndexRecord(serializedRecord, hasFilePosition));
-                translator.buildMetaMember(instance->classctx, serializedRecord, false, "queryDiskRecordSize");
-                translator.buildMetaMember(instance->classctx, serializedRecord, false, "queryProjectedDiskRecordSize");
+                translator.buildMetaMember(instance->classctx, expectedRecord, false, "queryDiskRecordSize");
+                translator.buildMetaMember(instance->classctx, projectedRecord, false, "queryProjectedDiskRecordSize");
                 break;
             }
             }
@@ -2556,7 +2590,7 @@ void SourceBuilder::buildCanMatch(IHqlExpression * expr)
         MemberFunction func(translator, instance->startctx);
         func.start("virtual bool canMatch(const void * _left) override");
         func.ctx.addQuotedLiteral("unsigned char * left = (unsigned char *)_left;");
-        if (newDiskReadMapping)
+        if (newInputMapping)
             translator.bindTableCursor(func.ctx, projectedSelector, "left");
         else
             translator.bindTableCursor(func.ctx, tableExpr, "left");
@@ -2627,7 +2661,22 @@ void SourceBuilder::gatherVirtualFields(bool ignoreVirtuals, bool ensureSerializ
     else
         physicalRecord.set(record);
 
-    if (newDiskReadMapping)
+    expectedRecord.set(physicalRecord);
+    projectedRecord.set(physicalRecord);
+
+    tableSelector.set(tableExpr->queryNormalizedSelector());
+    projectedSelector.set(tableSelector);
+}
+
+void SourceBuilder::deduceDiskRecords()
+{
+    HqlExprAttr mode = tableExpr->queryChild(2);
+    node_operator modeOp = mode->getOperator();
+    bool isPiped = modeOp==no_pipe;
+
+    gatherVirtualFields(tableExpr->hasAttribute(_noVirtual_Atom) || isPiped, needToSerializeRecord(modeOp));
+
+    if (newInputMapping)
     {
         projectedRecord.set(tableExpr->queryRecord());
         expectedRecord.setown(getSerializedForm(physicalRecord, diskAtom));
@@ -2643,11 +2692,39 @@ void SourceBuilder::gatherVirtualFields(bool ignoreVirtuals, bool ensureSerializ
     }
     else
     {
-        expectedRecord.set(physicalRecord);
-        projectedRecord.set(physicalRecord);
+        projectedRecord.set(tableExpr->queryRecord());
+        expectedRecord.setown(getSerializedForm(physicalRecord, diskAtom));
+    }
+}
+
+
+void SourceBuilder::deduceIndexRecords()
+{
+    gatherVirtualFields(true, true);
+
+    //A slightly round about way to get the meta including keyed/blob information for the physical file.
+    IHqlExpression * indexExpr = queryOriginalKey(tableExpr);
+    OwnedHqlExpr serializedRecord;
+    unsigned numPayload = numPayloadFields(indexExpr);
+    if (numPayload)
+        serializedRecord.setown(notePayloadFields(indexExpr->queryRecord(), numPayload));
+    else
+        serializedRecord.set(indexExpr->queryRecord());
+    serializedRecord.setown(getSerializedForm(serializedRecord, diskAtom));
+
+    bool hasFilePosition = getBoolAttribute(indexExpr, filepositionAtom, true);
+    expectedRecord.setown(createMetadataIndexRecord(serializedRecord, hasFilePosition));
+
+    if (newInputMapping)
+    {
+        //We are expecting the translator to map the field values, this uses the expected ecl structure
+        projectedRecord.set(tableExpr->queryRecord());
+        //physical?
+    }
+    else
+    {
+        projectedRecord.set(expectedRecord);    // This needs to match expectedRecord so that no translation occurs on keyed fields etc.
     }
-    tableSelector.set(tableExpr->queryNormalizedSelector());
-    projectedSelector.set(tableSelector);
 }
 
 
@@ -2825,7 +2902,7 @@ void DiskReadBuilderBase::buildMembers(IHqlExpression * expr)
         OwnedHqlExpr noVirtualRecord = removeVirtualAttributes(expectedRecord);
         translator.buildFormatCrcFunction(instance->classctx, "getDiskFormatCrc", noVirtualRecord);
 
-        if (newDiskReadMapping)
+        if (newInputMapping)
             translator.buildFormatCrcFunction(instance->classctx, "getProjectedFormatCrc", projectedRecord);
         else
             translator.buildFormatCrcFunction(instance->classctx, "getProjectedFormatCrc", noVirtualRecord);
@@ -2966,7 +3043,7 @@ protected:
 void DiskReadBuilder::analyseGraph(IHqlExpression * expr)
 {
     DiskReadBuilderBase::analyseGraph(expr);
-    if (newDiskReadMapping && extractCanMatch && firstTransformer)
+    if (newInputMapping && extractCanMatch && firstTransformer)
     {
         //Calculate the minimum set of fields required by any post-filters and projects.
         projectedRecord.setown(getMinimumInputRecord(translator, firstTransformer));
@@ -3110,10 +3187,10 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDiskRead(BuildCtx & ctx, IHqlE
     bool isPiped = modeOp==no_pipe;
 
     DiskReadBuilder info(*this, tableExpr, tableExpr->queryChild(0));
-    info.gatherVirtualFields(tableExpr->hasAttribute(_noVirtual_Atom) || isPiped, needToSerializeRecord(modeOp));
+    info.deduceDiskRecords();
 
     unsigned optFlags = (options.foldOptimized ? HOOfold : 0);
-    if (info.newDiskReadMapping && (modeOp != no_csv) && (modeOp != no_xml) && (modeOp != no_pipe))
+    if (info.newInputMapping && (modeOp != no_csv) && (modeOp != no_xml) && (modeOp != no_pipe))
     {
         //The projected disk information (which is passed to the transform) uses the in memory format IFF
         // - The disk read is a trivial slimming transform (so no transform needs calling on the projected disk format.
@@ -3232,7 +3309,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDiskNormalize(BuildCtx & ctx,
     assertex(mode->getOperator()!=no_pipe);
 
     DiskNormalizeBuilder info(*this, tableExpr, tableExpr->queryChild(0));
-    info.gatherVirtualFields(tableExpr->hasAttribute(_noVirtual_Atom), needToSerializeRecord(mode));
+    info.deduceDiskRecords();
 
     LinkedHqlExpr transformed = expr;
     if (info.recordHasVirtualsOrDeserialize())
@@ -3391,7 +3468,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDiskAggregate(BuildCtx & ctx,
     assertex(mode->getOperator()!=no_pipe);
 
     DiskAggregateBuilder info(*this, tableExpr, tableExpr->queryChild(0));
-    info.gatherVirtualFields(tableExpr->hasAttribute(_noVirtual_Atom), needToSerializeRecord(mode));
+    info.deduceDiskRecords();
 
     LinkedHqlExpr transformed = expr;
     if (info.recordHasVirtualsOrDeserialize())
@@ -3405,7 +3482,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDiskAggregate(BuildCtx & ctx,
     if (aggOp == no_countgroup || aggOp == no_existsgroup)
     {
         DiskCountBuilder info(*this, tableExpr, tableExpr->queryChild(0), aggOp);
-        info.gatherVirtualFields(tableExpr->hasAttribute(_noVirtual_Atom), needToSerializeRecord(mode));
+        info.deduceDiskRecords();
 
         return info.buildActivity(ctx, expr, TAKdiskcount, "DiskCount", NULL);
     }
@@ -3469,7 +3546,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDiskGroupAggregate(BuildCtx &
     assertex(mode->getOperator()!=no_pipe);
 
     DiskGroupAggregateBuilder info(*this, tableExpr, tableExpr->queryChild(0));
-    info.gatherVirtualFields(tableExpr->hasAttribute(_noVirtual_Atom), needToSerializeRecord(mode));
+    info.deduceDiskRecords();
 
     LinkedHqlExpr transformed = expr;
     if (info.recordHasVirtualsOrDeserialize())
@@ -3821,9 +3898,21 @@ void IndexReadBuilderBase::buildMembers(IHqlExpression * expr)
 
     buildKeyedLimitHelper(expr);
 
-    translator.buildFormatCrcFunction(instance->classctx, "getDiskFormatCrc", true, tableExpr, tableExpr, 1);
-    translator.buildFormatCrcFunction(instance->classctx, "getProjectedFormatCrc", true, tableExpr, tableExpr, 1);
-    IHqlExpression * originalKey = queryAttributeChild(tableExpr, _original_Atom, 0);
+    LinkedHqlExpr diskRecord = tableExpr->queryRecord();
+    if (newInputMapping)
+    {
+        HqlMapTransformer mapper;
+        bool hasFilePosition = getBoolAttribute(tableExpr, filepositionAtom, true);
+        diskRecord.setown(createPhysicalIndexRecord(mapper, diskRecord, hasFilePosition, false));
+    }
+
+    translator.buildFormatCrcFunction(instance->classctx, "getDiskFormatCrc", true, diskRecord, tableExpr, 1);
+    if (newInputMapping || (!tableExpr || !isKey(tableExpr)))
+        translator.buildFormatCrcFunction(instance->classctx, "getProjectedFormatCrc", projectedRecord);
+    else
+        translator.buildFormatCrcFunction(instance->classctx, "getProjectedFormatCrc", true, diskRecord, tableExpr, 1); // backward compatibility for indexes
+
+    IHqlExpression * originalKey = queryOriginalKey(tableExpr);
     translator.buildSerializedLayoutMember(instance->classctx, originalKey->queryRecord(), "getIndexLayout", numKeyedFields(originalKey));
 
     //Note the helper base class contains code like the following
@@ -4012,7 +4101,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityIndexRead(BuildCtx & ctx, IHql
     traceExpression("before index read", optimized);
     assertex(tableExpr->getOperator() == no_newkeyindex);
     NewIndexReadBuilder info(*this, tableExpr, tableExpr->queryChild(3));
-    info.gatherVirtualFields(true, true);
+    info.deduceIndexRecords();
     if (info.containsStepping(optimized))
         return info.buildActivity(ctx, optimized, TAKindexread, "SteppedIndexRead", NULL);
     return info.buildActivity(ctx, optimized, TAKindexread, "IndexRead", NULL);
@@ -4086,7 +4175,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityIndexNormalize(BuildCtx & ctx,
 
     assertex(tableExpr->getOperator() == no_newkeyindex);
     IndexNormalizeBuilder info(*this, tableExpr, tableExpr->queryChild(3));
-    info.gatherVirtualFields(true, true);
+    info.deduceIndexRecords();
     return info.buildActivity(ctx, optimized, TAKindexnormalize, "IndexNormalize", NULL);
 }
 
@@ -4245,13 +4334,13 @@ ABoundActivity * HqlCppTranslator::doBuildActivityIndexAggregate(BuildCtx & ctx,
     if (aggOp == no_countgroup || aggOp == no_existsgroup)
     {
         IndexCountBuilder info(*this, tableExpr, tableExpr->queryChild(3), aggOp);
-        info.gatherVirtualFields(true, true);
+        info.deduceIndexRecords();
         return info.buildActivity(ctx, optimized, TAKindexcount, "IndexCount", NULL);
     }
     else
     {
         IndexAggregateBuilder info(*this, tableExpr, tableExpr->queryChild(3));
-        info.gatherVirtualFields(true, true);
+        info.deduceIndexRecords();
         return info.buildActivity(ctx, optimized, TAKindexaggregate, "IndexAggregate", NULL);
     }
 }
@@ -4405,7 +4494,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityIndexGroupAggregate(BuildCtx &
     ThorActivityKind tak = TAKindexgroupaggregate;
     assertex(tableExpr->getOperator() == no_newkeyindex);
     IndexGroupAggregateBuilder info(*this, tableExpr, tableExpr->queryChild(3));
-    info.gatherVirtualFields(true, true);
+    info.deduceIndexRecords();
     return info.buildActivity(ctx, optimized, tak, "IndexGroupAggregate", NULL);
 }
 
@@ -4832,7 +4921,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityFetch(BuildCtx & ctx, IHqlExpr
     if (!tableExpr)
         throwError(HQLERR_FetchNonDiskfile);
     FetchBuilder info(*this, tableExpr, tableExpr->queryChild(0), expr);
-    info.gatherVirtualFields(false, true);//?needToSerializeRecord(mode)
+    info.deduceDiskRecords();//?needToSerializeRecord(mode)
 
     unsigned optFlags = (options.foldOptimized ? HOOfold : 0);
     if (info.recordHasVirtualsOrDeserialize())

+ 2 - 0
ecl/hthor/hthor.ipp

@@ -162,6 +162,8 @@ static bool verifyFormatCrc(unsigned helperCrc, IDistributedFile * df, char cons
             if(fail)
                 throw MakeStringException(0, "%s", msg.str());
             WARNLOG("%s", msg.str());
+            //MORE: Should we add a warning, similar to the following:
+            //agent.addWuException(msg.str(), WRN_UseLayoutTranslation, SeverityWarning, "hthor");
             return false;
         }
     }

+ 31 - 32
ecl/hthor/hthorkey.cpp

@@ -665,56 +665,55 @@ void CHThorIndexReadActivityBase::getLayoutTranslators()
         {
             IDistributedFile & f = superIterator->query();
             layoutTrans.setown(getLayoutTranslator(&f));
-            if(layoutTrans)
-            {
-                StringBuffer buff;
-                buff.append("Using record layout translation to correct layout mismatch on reading index ").append(f.queryLogicalName());
-                WARNLOG("%s", buff.str());
-                agent.addWuException(buff.str(), WRN_UseLayoutTranslation, SeverityWarning, "hthor");
-            }
             layoutTransArray.append(layoutTrans.getClear());
         } while(superIterator->next());
     }
     else
     {
         layoutTrans.setown(getLayoutTranslator(df));
-        if(layoutTrans)
-        {
-            StringBuffer buff;
-            buff.append("Using record layout translation to correct layout mismatch on reading index ").append(df->queryLogicalName());
-            WARNLOG("%s", buff.str());
-            agent.addWuException(buff.str(), WRN_UseLayoutTranslation, SeverityWarning, "hthor");
-        }
     }
 }
 
 const IDynamicTransform * CHThorIndexReadActivityBase::getLayoutTranslator(IDistributedFile * f)
 {
-    if(getLayoutTranslationMode() == RecordTranslationMode::AlwaysECL)
-        return NULL;
+    IOutputMetaData * expectedFormat = helper.queryDiskRecordSize();
+    Linked<IOutputMetaData> actualFormat = expectedFormat;
 
-    if(getLayoutTranslationMode() == RecordTranslationMode::None)
+    switch (getLayoutTranslationMode())
     {
+    case RecordTranslationMode::AlwaysECL:
+        break;
+    case RecordTranslationMode::None:
         verifyFormatCrc(helper.getDiskFormatCrc(), f, (superIterator ? superName.str() : NULL) , true, true);
-        return NULL;
+        break;
+    default:
+        if(!verifyFormatCrc(helper.getDiskFormatCrc(), f, (superIterator ? superName.str() : NULL) , true, false))
+        {
+            IPropertyTree &props = f->queryAttributes();
+            actualFormat.setown(getDaliLayoutInfo(props));
+            if (!actualFormat)
+                throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s - key layout information not found", f->queryLogicalName());
+
+            //MORE: We could introduce a more efficient way of checking this that does not create a translator
+            Owned<const IDynamicTransform> actualTranslator = createRecordTranslator(expectedFormat->queryRecordAccessor(true), actualFormat->queryRecordAccessor(true));
+            if (actualTranslator->keyedTranslated())
+                throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s - keyed fields do not match", f->queryLogicalName());
+
+            actualLayouts.append(actualFormat.getLink());  // ensure adequate lifespan
+        }
+        break;
     }
 
-    if(verifyFormatCrc(helper.getDiskFormatCrc(), f, (superIterator ? superName.str() : NULL) , true, false))
-        return NULL;
+    IOutputMetaData * projectedFormat = helper.queryProjectedDiskRecordSize();
+    if (projectedFormat == actualFormat)
+        return nullptr;
 
-    IPropertyTree &props = f->queryAttributes();
-    Owned<IOutputMetaData> actualFormat = getDaliLayoutInfo(props);
-    if (actualFormat)
-    {
-        actualLayouts.append(actualFormat.getLink());  // ensure adequate lifespan
-        Owned<const IDynamicTransform> payloadTranslator =  createRecordTranslator(helper.queryProjectedDiskRecordSize()->queryRecordAccessor(true), actualFormat->queryRecordAccessor(true));
-        if (!payloadTranslator->canTranslate())
-            throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s", f->queryLogicalName());
-        if (payloadTranslator->keyedTranslated())
-            throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s - keyed fields do not match", f->queryLogicalName());
+    Owned<const IDynamicTransform> payloadTranslator =  createRecordTranslator(projectedFormat->queryRecordAccessor(true), actualFormat->queryRecordAccessor(true));
+    if (!payloadTranslator->canTranslate())
+        throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s", f->queryLogicalName());
+    if (payloadTranslator->needsTranslate())
         return payloadTranslator.getClear();
-    }
-    throw MakeStringException(0, "Untranslatable key layout mismatch reading index %s - key layout information not found", f->queryLogicalName());
+    return nullptr;
 }
 
 void CHThorIndexReadActivityBase::verifyIndex(IKeyIndex * idx)

+ 1 - 1
ecl/regress/jpg.ecl

@@ -46,7 +46,7 @@ END;
 
 
 
-d := dataset('victor::imga', Layout_imgdb, flat, virtual(legacy));
+d := dataset('victor::imga', Layout_imgdb, flat, __OPTION__(legacy));
 
 // e := distribute(d, RANDOM());
 // output(e,,'victor::imga.dist', overwrite);

+ 1 - 1
ecl/regress/nestvstr.ecl

@@ -38,7 +38,7 @@ end;
 d1 := dataset([{'id1', '20030911', 5, x'1234567890'}, {'id2', '20030910', 3, x'123456'}], rawLayout);
 output(d1,,'imgfile', overwrite);
 
-d := dataset('imgfile', { rawLayout, unsigned8 _fpos{virtual(fileposition)} }, FLAT, virtual(legacy));
+d := dataset('imgfile', { rawLayout, unsigned8 _fpos{virtual(fileposition)} }, FLAT, __OPTION__(legacy));
 i := index(d, { dl, _fpos }, 'imgindex');
 
 buildindex(i, overwrite);

+ 1 - 1
ecl/regress/nestvstr2.ecl

@@ -40,7 +40,7 @@ end;
 d1 := dataset([{'id1', '20030911', 5, x'1234567890'}, {'id2', '20030910', 3, x'123456'}], rawLayout);
 //output(d1,,'imgfile', overwrite);
 
-d := dataset('imgfile', { rawLayout x, unsigned8 _fpos{virtual(fileposition)} }, FLAT, virtual(legacy));
+d := dataset('imgfile', { rawLayout x, unsigned8 _fpos{virtual(fileposition)} }, FLAT, __OPTION__(legacy));
 i := index(d, { x.dl, _fpos }, 'imgindex');
 
 buildindex(i, overwrite);

+ 1 - 1
ecl/regress/nestvstr3.ecl

@@ -40,5 +40,5 @@ end;
 d1 := dataset([{'id1', '20030911', 5, x'1234567890'}, {'id2', '20030910', 3, x'123456'}], rawLayout);
 //output(d1,,'imgfile', overwrite);
 
-d := dataset('imgfile', { rawLayout x, unsigned8 _fpos{virtual(fileposition)} }, FLAT, virtual(legacy));
+d := dataset('imgfile', { rawLayout x, unsigned8 _fpos{virtual(fileposition)} }, FLAT, __OPTION__(legacy));
 output(d);

+ 7 - 0
testing/regress/ecl/badindex.ecl

@@ -15,9 +15,16 @@
     limitations under the License.
 ############################################################################## */
 
+//version newIndexReadMapping=false
+//version newIndexReadMapping=true,noroxie,nothor
+
 // Testing various indexes that might present tricky cases for remote projection/filtering
 
 import ^ as root;
+newIndexReadMapping := #IFDEFINED(root.newIndexReadMapping, false);
+
+#option ('newIndexReadMapping', newIndexReadMapping);
+
 import $.setup;
 prefix := setup.Files(false, false).IndexPrefix;
 

+ 1 - 1
testing/regress/ecl/imgkey.ecl

@@ -67,7 +67,7 @@ d1 := dataset([
     rawLayout);
 output(d1,,prefix + 'imgfile', overwrite);
 
-d := dataset(prefix + 'imgfile', rawLayout1, FLAT, virtual(legacy));
+d := dataset(prefix + 'imgfile', rawLayout1, FLAT, __option__(legacy(true)));
 i := index(d, keylayout, 'imgindex');
 
 rawtrim := table(d, { dl, date, unsigned2 seq:=0, unsigned2 num := 0, imgLength, _fpos});