فهرست منبع

HPCC-10022 Option to generate a persist for each unique ECL hash

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 11 سال پیش
والد
کامیت
bff28aa94c

+ 11 - 1
common/workunit/workflow.cpp

@@ -162,13 +162,20 @@ public:
     virtual unsigned     queryContingencyFor() const { return tree->getPropInt("@contingencyFor", 0); }
     virtual IStringVal & getPersistName(IStringVal & val) const { val.set(tree->queryProp("@persistName")); return val; }
     virtual unsigned     queryPersistWfid() const { return tree->getPropInt("@persistWfid", 0); }
+    virtual int          queryPersistCopies() const { return tree->getPropInt("@persistCopies", 0); }
     virtual IStringVal & queryCluster(IStringVal & val) const { val.set(tree->queryProp("@cluster")); return val; }
     virtual void         setScheduledNow() { tree->setPropTree("Schedule", createPTree()); setEnum(tree, "@state", WFStateReqd, wfstates); }
     virtual void         setScheduledOn(char const * name, char const * text) { IPropertyTree * stree = createPTree(); stree->setProp("@name", name); stree->setProp("@text", text); tree->setPropTree("Schedule", createPTree())->setPropTree("Event", stree); setEnum(tree, "@state", WFStateWait, wfstates); }
     virtual void         setSchedulePriority(unsigned priority) { assertex(tree->hasProp("Schedule")); tree->setPropInt("Schedule/@priority", priority); }
     virtual void         setScheduleCount(unsigned count) { assertex(tree->hasProp("Schedule")); tree->setPropInt("Schedule/@count", count); tree->setPropInt("Schedule/@countRemaining", count); }
     virtual void         addDependency(unsigned wfid) { tree->addPropTree("Dependency", createPTree())->setPropInt("@wfid", wfid); }
-    virtual void         setPersistInfo(char const * name, unsigned wfid) { tree->setProp("@persistName", name); tree->setPropInt("@persistWfid", wfid); }
+    virtual void         setPersistInfo(char const * name, unsigned wfid, int numPersistInstances)
+    {
+        tree->setProp("@persistName", name);
+        tree->setPropInt("@persistWfid", wfid);
+        if (numPersistInstances != 0)
+            tree->setPropInt("@persistCopies", (int)numPersistInstances);
+    }
     virtual void         setCluster(const char * cluster) { tree->setProp("@cluster", cluster); }
     //info set at run time
     virtual unsigned     queryScheduleCountRemaining() const { assertex(tree->hasProp("Schedule")); return tree->getPropInt("Schedule/@countRemaining"); }
@@ -343,6 +350,7 @@ private:
     SCMStringBuffer persistName;
     SCMStringBuffer clusterName;
     unsigned persistWfid;
+    int persistCopies;
     StringAttr eventName;
     StringAttr eventExtra;
 
@@ -376,6 +384,7 @@ public:
         other->getPersistName(persistName);
         persistWfid = other->queryPersistWfid();
         scheduledWfid = other->queryScheduledWfid();
+        persistCopies = other->queryPersistCopies();
         other->queryCluster(clusterName);
     }
     //info set at compile time
@@ -396,6 +405,7 @@ public:
     virtual unsigned     queryContingencyFor() const { return contingencyFor; }
     virtual IStringVal & getPersistName(IStringVal & val) const { val.set(persistName.str()); return val; }
     virtual unsigned     queryPersistWfid() const { return persistWfid; }
+    virtual int          queryPersistCopies() const { return persistCopies; }
     virtual IStringVal & queryCluster(IStringVal & val) const { val.set(clusterName.str()); return val; }
     //info set at run time
     virtual unsigned     queryScheduleCountRemaining() const { return schedule ? schedule->queryCountRemaining() : 0; }

+ 2 - 1
common/workunit/workunit.hpp

@@ -626,6 +626,7 @@ interface IConstWorkflowItem : extends IInterface
     virtual unsigned queryContingencyFor() const = 0;
     virtual IStringVal & getPersistName(IStringVal & val) const = 0;
     virtual unsigned queryPersistWfid() const = 0;
+    virtual int queryPersistCopies() const = 0;  // 0 - unmangled name,  < 0 - use default, > 0 - max number
     virtual unsigned queryScheduleCountRemaining() const = 0;
     virtual WFState queryState() const = 0;
     virtual unsigned queryRetriesRemaining() const = 0;
@@ -657,7 +658,7 @@ interface IWorkflowItem : extends IRuntimeWorkflowItem
     virtual void setSchedulePriority(unsigned priority) = 0;
     virtual void setScheduleCount(unsigned count) = 0;
     virtual void addDependency(unsigned wfid) = 0;
-    virtual void setPersistInfo(const char * name, unsigned wfid) = 0;
+    virtual void setPersistInfo(const char * name, unsigned wfid, int maxCopies) = 0;
     virtual void syncRuntimeData(const IConstWorkflowItem & other) = 0;
     virtual void setScheduledWfid(unsigned wfid) = 0;
     virtual void setCluster(const char * cluster) = 0;

+ 4 - 0
ecl/hql/hqlatoms.cpp

@@ -94,6 +94,7 @@ IAtom * checkoutAtom;
 IAtom * _childAttr_Atom;
 IAtom * choosenAtom;
 IAtom * clusterAtom;
+IAtom * _codehash_Atom;
 IAtom * _colocal_Atom;
 IAtom * commonAtom;
 IAtom * _complexKeyed_Atom;
@@ -244,6 +245,7 @@ IAtom * minimalAtom;
 IAtom * memoryAtom;
 IAtom * moduleAtom;
 IAtom * mofnAtom;
+IAtom * multipleAtom;
 IAtom * nameAtom;
 IAtom * namedAtom;
 IAtom * namespaceAtom;
@@ -497,6 +499,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKESYSATOM(childAttr);
     MAKEATOM(choosen);
     MAKEATOM(cluster);
+    MAKESYSATOM(codehash);
     MAKESYSATOM(colocal);
     MAKEATOM(common);
     MAKESYSATOM(complexKeyed);
@@ -648,6 +651,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(minimal);
     MAKEATOM(module);
     MAKEATOM(mofn);
+    MAKEATOM(multiple);
     MAKEATOM(name);
     MAKEATOM(named);
     MAKEATOM(namespace);

+ 2 - 0
ecl/hql/hqlatoms.hpp

@@ -96,6 +96,7 @@ extern HQL_API IAtom * checkoutAtom;
 extern HQL_API IAtom * _childAttr_Atom;
 extern HQL_API IAtom * choosenAtom;
 extern HQL_API IAtom * clusterAtom;
+extern HQL_API IAtom * _codehash_Atom;
 extern HQL_API IAtom * _colocal_Atom;
 extern HQL_API IAtom * commonAtom;
 extern HQL_API IAtom * _complexKeyed_Atom;
@@ -247,6 +248,7 @@ extern HQL_API IAtom * minAtom;
 extern HQL_API IAtom * minimalAtom;
 extern HQL_API IAtom * moduleAtom;
 extern HQL_API IAtom * mofnAtom;
+extern HQL_API IAtom * multipleAtom;
 extern HQL_API IAtom * nameAtom;
 extern HQL_API IAtom * namedAtom;
 extern HQL_API IAtom * namespaceAtom;

+ 9 - 1
ecl/hql/hqlgram.y

@@ -187,6 +187,7 @@ static void eclsyntaxerror(HqlGram * parser, const char * s, short yystate, int
   ENTH
   ENUM
   TOK_ERROR
+  ESCAPE
   EVALUATE
   EVENT
   EVENTEXTRA
@@ -297,6 +298,7 @@ static void eclsyntaxerror(HqlGram * parser, const char * s, short yystate, int
   MIN
   MODULE
   MOFN
+  MULTIPLE
   NAMED
   NAMEOF
   NAMESPACE
@@ -415,7 +417,6 @@ static void eclsyntaxerror(HqlGram * parser, const char * s, short yystate, int
   TAN
   TANH
   TERMINATOR
-  ESCAPE
   THEN
   THISNODE
   THOR
@@ -1663,6 +1664,13 @@ persistOpt
     : fewMany
     | expireAttr
     | clusterAttr
+    | SINGLE            {   $$.setExpr(createAttribute(singleAtom), $1); }
+    | MULTIPLE          {   $$.setExpr(createExprAttribute(multipleAtom), $1); }
+    | MULTIPLE '(' expression ')'
+                        {
+                            parser->normalizeExpression($3, type_int, true);
+                            $$.setExpr(createExprAttribute(multipleAtom, $3.getExpr()), $1);
+                        }
     ;
 
 globalOpts

+ 1 - 0
ecl/hql/hqlgram2.cpp

@@ -10269,6 +10269,7 @@ static void getTokenText(StringBuffer & msg, int token)
     case MIN: msg.append("MIN"); break;
     case MODULE: msg.append("MODULE"); break;
     case MOFN: msg.append("MOFN"); break;
+    case MULTIPLE: msg.append("MULTIPLE"); break;
     case NAMED: msg.append("NAMED"); break;
     case NAMEOF: msg.append("__NAMEOF__"); break;
     case NAMESPACE: msg.append("NAMESPACE"); break;

+ 1 - 0
ecl/hql/hqllex.l

@@ -794,6 +794,7 @@ MERGEJOIN           { RETURNSYM(MERGEJOIN); }
 MIN                 { RETURNSYM(MIN); }
 MODULE              { RETURNHARD(MODULE); }
 MOFN                { RETURNSYM(MOFN); }
+MULTIPLE            { RETURNSYM(MULTIPLE); }
 NAMED               { RETURNSYM(NAMED); }
 __NAMEOF__          { RETURNSYM(NAMEOF); }
 NAMESPACE           { RETURNSYM(NAMESPACE); }

+ 0 - 4
ecl/hqlcpp/hqlcatom.cpp

@@ -326,7 +326,6 @@ IIdAtom * failDivideByZeroId;
 IIdAtom * _failId;
 IIdAtom * fileExistsId;
 IIdAtom * finalizeRowClearId;
-IIdAtom * finishPersistId;
 IIdAtom * freeId;
 IIdAtom * freeExceptionId;
 IIdAtom * getBytesFromBuilderId;
@@ -607,7 +606,6 @@ IIdAtom * set2SetXId;
 IIdAtom * sinId;
 IIdAtom * sinhId;
 IIdAtom * sqrtId;
-IIdAtom * startPersistId;
 IIdAtom * str2DataId;
 IIdAtom * str2DataXId;
 IIdAtom * strToQStrId;
@@ -972,7 +970,6 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM-1)
     MAKEID(failDivideByZero);
     MAKEID(fileExists);
     MAKEID(finalizeRowClear);
-    MAKEID(finishPersist);
     MAKEID(free);
     MAKEID(freeException);
     MAKEID(getBytesFromBuilder);
@@ -1281,7 +1278,6 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM-1)
     sinId = createIdAtom("_sin");
     sinhId = createIdAtom("_sinh");
     sqrtId = createIdAtom("_sqrt");
-    MAKEID(startPersist);
     MAKEID(str2Data);
     MAKEID(str2DataX);
     MAKEID(strToQStr);

+ 0 - 2
ecl/hqlcpp/hqlcatom.hpp

@@ -326,7 +326,6 @@ extern IIdAtom * failDivideByZeroId;
 extern IIdAtom * _failId;
 extern IIdAtom * fileExistsId;
 extern IIdAtom * finalizeRowClearId;
-extern IIdAtom * finishPersistId;
 extern IIdAtom * freeId;
 extern IIdAtom * freeExceptionId;
 extern IIdAtom * getBytesFromBuilderId;
@@ -607,7 +606,6 @@ extern IIdAtom * set2SetXId;
 extern IIdAtom * sinId;
 extern IIdAtom * sinhId;
 extern IIdAtom * sqrtId;
-extern IIdAtom * startPersistId;
 extern IIdAtom * str2DataId;
 extern IIdAtom * str2DataXId;
 extern IIdAtom * strToQStrId;

+ 1 - 0
ecl/hqlcpp/hqlcpp.cpp

@@ -1707,6 +1707,7 @@ void HqlCppTranslator::cacheOptions()
         DebugOption(options.preserveCaseExternalParameter,"preserveCaseExternalParameter",true),
         DebugOption(options.optimizeParentAccess,"optimizeParentAccess",false),
         DebugOption(options.expandPersistInputDependencies,"expandPersistInputDependencies",true),
+        DebugOption(options.multiplePersistInstances,"multiplePersistInstances",true),
     };
 
     //get options values from workunit

+ 1 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -720,6 +720,7 @@ struct HqlCppOptions
     bool                expandHashJoin;
     bool                traceIR;
     bool                preserveCaseExternalParameter;
+    bool                multiplePersistInstances;
     bool                optimizeParentAccess;
     bool                expandPersistInputDependencies;
 };

+ 0 - 6
ecl/hqlcpp/hqlcppsys.ecl

@@ -613,12 +613,6 @@ const char * cppSystemText[]  = {
 
     "   selectCluster(const varstring src)  : gctxmethod,entrypoint='selectCluster';",
     "   restoreCluster()    : gctxmethod,entrypoint='restoreCluster';",
-    "   startPersist(const varstring src)   : gctxmethod,entrypoint='startPersist';",
-    "   finishPersist() : gctxmethod,entrypoint='finishPersist';",
-
-    "   clearPersist(const varstring name)  : gctxmethod,entrypoint='clearPersist';",
-    "   updatePersist(const varstring name, unsigned4 eclCRC, unsigned8 allCRC) : gctxmethod,entrypoint='updatePersist';",
-    "   checkPersistMatches(const varstring name, unsigned4 eclCRC) : gctxmethod,entrypoint='checkPersistMatches';",
 
     "   integer4 compareUtf8Utf8(const utf8 l, const utf8 r, const varstring codepage) : eclrtl,pure,library='eclrtl',entrypoint='rtlCompareUtf8Utf8';",
     "   integer4 compareUtf8Utf8Strength(const utf8 l, const utf8 r, const varstring loc, unsigned4 str) : eclrtl,pure,library='eclrtl',entrypoint='rtlCompareUtf8Utf8Strength';",

+ 1 - 4
ecl/hqlcpp/hqlhtcpp.cpp

@@ -65,7 +65,6 @@
 #include "eclhelper_base.hpp"
 
 #define MAX_ROWS_OUTPUT_TO_SDS              1000
-#define PERSIST_VERSION                     1           // Increment when implementation is incompatible.
 #define MAX_SAFE_RECORD_SIZE                10000000
 #define DEFAULT_EXPIRY_PERIOD               7
 #define MAX_GRAPH_ECL_LENGTH                1000
@@ -17233,7 +17232,6 @@ void HqlCppTranslator::buildWorkflowItem(BuildCtx & ctx, IHqlStmt * switchStmt,
 
 void HqlCppTranslator::buildWorkflowPersistCheck(BuildCtx & ctx, IHqlExpression * expr)
 {
-    IHqlExpression * original = queryAttributeChild(expr, _original_Atom, 0);
 
     OwnedHqlExpr resultName = ::createResultName(queryAttributeChild(expr, namedAtom, 0));
     resultName.setown(ensureExprType(resultName, unknownVarStringType));
@@ -17249,8 +17247,7 @@ void HqlCppTranslator::buildWorkflowPersistCheck(BuildCtx & ctx, IHqlExpression
     if (resultsRead)
         unwindChildren(dependencies.resultsRead, resultsRead);
 
-    unsigned crc = getExpressionCRC(original) + PERSIST_VERSION;
-    OwnedHqlExpr crcVal = getSizetConstant(crc);
+    IHqlExpression *  crcVal = queryAttributeChild(expr, _codehash_Atom, 0);
     OwnedHqlExpr crcExpr = calculatePersistInputCrc(ctx, dependencies);
     HqlExprArray args;
     args.append(*LINK(resultName));

+ 49 - 16
ecl/hqlcpp/hqlttcpp.cpp

@@ -47,6 +47,7 @@
 //#define REMOVE_NAMED_SCALARS
 //#define OPTIMIZE_IMPLICIT_CAST
 
+#define PERSIST_VERSION                     1           // Increment when implementation is incompatible.
 #define REMOVE_GLOBAL_ANNOTATION                    // This should improve cse.  It currently does for some, but not others...
 
 #define DEFAULT_FOLD_OPTIONS    HFOfoldfilterproject
@@ -131,15 +132,23 @@ struct GlobalAttributeInfo
 {
 public:
     GlobalAttributeInfo(const char * _filePrefix, const char * _storedPrefix, IHqlExpression * _value) : value(_value)
-    { setOp = no_none; persistOp = no_none; few = false; filePrefix = _filePrefix; storedPrefix = _storedPrefix; }
+    {
+        setOp = no_none;
+        persistOp = no_none;
+        few = false;
+        filePrefix = _filePrefix;
+        storedPrefix = _storedPrefix;
+        numPersistInstances = 0;
+    }
 
     void extractGlobal(IHqlExpression * global, ClusterType platform);
-    void extractStoredInfo(IHqlExpression * expr, IHqlExpression * originalValue, bool isRoxie);
+    void extractStoredInfo(IHqlExpression * expr, IHqlExpression * codehash, bool isRoxie, bool multiplePersistInstances);
     void checkFew(HqlCppTranslator & translator);
     void splitGlobalDefinition(ITypeInfo * type, IHqlExpression * value, IConstWorkUnit * wu, SharedHqlExpr & setOutput, OwnedHqlExpr * getOutput, bool isRoxie);
     IHqlExpression * getStoredKey();
     void preventDiskSpill() { few = true; }
     IHqlExpression * queryCluster() const { return cluster; }
+    int queryMaxPersistCopies() const { return numPersistInstances; }
 
 protected:
     void doSplitGlobalDefinition(ITypeInfo * type, IHqlExpression * value, IConstWorkUnit * wu, SharedHqlExpr & setOutput, OwnedHqlExpr * getOutput, bool isRoxie);
@@ -162,8 +171,10 @@ protected:
     OwnedHqlExpr cluster;
     OwnedHqlExpr extraSetAttr;
     OwnedHqlExpr extraOutputAttr;
+    OwnedHqlExpr codehash;
     const char * filePrefix;
     const char * storedPrefix;
+    int numPersistInstances;
     bool few;
 };
 
@@ -4825,7 +4836,7 @@ void GlobalAttributeInfo::extractGlobal(IHqlExpression * global, ClusterType pla
     persistOp = no_global;
 }
 
-void GlobalAttributeInfo::extractStoredInfo(IHqlExpression * expr, IHqlExpression * originalValue, bool isRoxie)
+void GlobalAttributeInfo::extractStoredInfo(IHqlExpression * expr, IHqlExpression * _codehash, bool isRoxie, bool multiplePersistInstances)
 {
     node_operator op = expr->getOperator();
     few = expr->hasAttribute(fewAtom) || (isRoxie) || (value->isDictionary() && !expr->hasAttribute(manyAtom));
@@ -4844,13 +4855,29 @@ void GlobalAttributeInfo::extractStoredInfo(IHqlExpression * expr, IHqlExpressio
         extraSetAttr.setown(createAttribute(checkpointAtom));
         break;
     case no_persist:
+        assertex(_codehash);
+        codehash.set(_codehash);
         setOp = no_ensureresult;
         storedName.set(expr->queryChild(0));
         sequence.setown(getGlobalSequenceNumber());
-        extraSetAttr.setown(createAttribute(_workflowPersist_Atom, LINK(originalValue)));
+        extraSetAttr.setown(createAttribute(_workflowPersist_Atom, LINK(codehash)));
         setCluster(queryRealChild(expr, 1));
         few = expr->hasAttribute(fewAtom);       // PERSISTs need a consistent format.
         extraOutputAttr.setown(createComma(LINK(expr->queryAttribute(expireAtom)), LINK(expr->queryAttribute(clusterAtom))));
+        numPersistInstances = multiplePersistInstances ? -1 : 0;
+        if (expr->hasAttribute(multipleAtom))
+            numPersistInstances = getIntValue(queryAttributeChild(expr, multipleAtom, 0), -1);
+        else if (expr->hasAttribute(singleAtom))
+            numPersistInstances = 0;
+
+        if (numPersistInstances != 0)
+        {
+            StringBuffer s;
+            getStringValue(s, storedName);
+            s.append("__");
+            getStringValue(s, codehash);
+            storedName.setown(createConstant(s.str()));
+        }
         break;
     case no_global:
         throwUnexpected();
@@ -5256,6 +5283,7 @@ WorkflowTransformer::WorkflowTransformer(IWorkUnit * _wu, HqlCppTranslator & _tr
     combineAllStored = translator.queryOptions().combineAllStored;
     combineTrivialStored = translator.queryOptions().combineTrivialStored;
     expandPersistInputDependencies = translator.queryOptions().expandPersistInputDependencies;
+    multiplePersistInstances = translator.queryOptions().multiplePersistInstances;
     isRootAction = true;
     isRoxie = (translator.getTargetClusterType() == RoxieCluster);
     workflowOut = NULL;
@@ -5302,9 +5330,9 @@ void WorkflowTransformer::setWorkflowSchedule(IWorkflowItem * wf, const Schedule
     wf->setSchedulePriority(priority);
 }
 
-void WorkflowTransformer::setWorkflowPersist(IWorkflowItem * wf, char const * persistName, unsigned persistWfid)
+void WorkflowTransformer::setWorkflowPersist(IWorkflowItem * wf, char const * persistName, unsigned persistWfid, int numPersistInstances)
 {
-    wf->setPersistInfo(persistName, persistWfid);
+    wf->setPersistInfo(persistName, persistWfid, numPersistInstances);
 }
 
 WorkflowItem * WorkflowTransformer::createWorkflowItem(IHqlExpression * expr, unsigned wfid, node_operator workflowOp)
@@ -5441,8 +5469,13 @@ IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransfo
     HqlExprArray actions;
     unwindChildren(actions, expr, 1);
 
-    IHqlExpression * original = queryAttribute(_original_Atom, actions);
-    if (original) original = original->queryChild(0);
+    IHqlExpression * originalAttr = queryAttribute(_original_Atom, actions);
+    OwnedHqlExpr codehash;
+    if (originalAttr)
+    {
+        unsigned crc = getExpressionCRC(originalAttr->queryChild(0)) + PERSIST_VERSION;
+        codehash.setown(getSizetConstant(crc));
+    }
 
     //First check for duplicate expressions, and cope with the weird case where they are identical except for the annotations.
     //Do it before wfid is allocated to make life simpler
@@ -5455,7 +5488,7 @@ IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransfo
         case no_persist:
         case no_checkpoint:
         case no_stored:
-            info.extractStoredInfo(&cur, original, isRoxie);
+            info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
 
             OwnedHqlExpr id = info.getStoredKey();
             unsigned match = alreadyProcessed.find(*id);
@@ -5521,7 +5554,7 @@ IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransfo
         case no_checkpoint:
         case no_stored:
             {
-                info.extractStoredInfo(&cur, original, isRoxie);
+                info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
 
                 OwnedHqlExpr id = info.getStoredKey();
                 alreadyProcessed.append(*id.getClear());
@@ -5531,14 +5564,14 @@ IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransfo
             break;
         case no_independent:
         case no_once:
-            info.extractStoredInfo(&cur, original, isRoxie);
+            info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
             break;
         case no_success:
             {
                 OwnedHqlExpr successExpr = transformSequentialEtc(cur.queryChild(0));
                 conts.success = splitValue(successExpr);
                 Owned<IWorkflowItem> wf = addWorkflowContingencyToWorkunit(conts.success, WFTypeSuccess, WFModeNormal, queryDirectDependencies(successExpr), NULL, wfid);
-                info.extractStoredInfo(&cur, original, isRoxie);
+                info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
                 break;
             }
         case no_failure:
@@ -5546,7 +5579,7 @@ IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransfo
                 OwnedHqlExpr failureExpr = transformSequentialEtc(cur.queryChild(0));
                 conts.failure = splitValue(failureExpr);
                 Owned<IWorkflowItem> wf = addWorkflowContingencyToWorkunit(conts.failure, WFTypeFailure, WFModeNormal, queryDirectDependencies(failureExpr), NULL, wfid);
-                info.extractStoredInfo(&cur, original, isRoxie);
+                info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
                 break;
             }
         case no_recovery:
@@ -5554,7 +5587,7 @@ IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransfo
             conts.recovery = splitValue(cur.queryChild(0));
             conts.retries = (unsigned)getIntValue(cur.queryChild(1), 0);
                 Owned<IWorkflowItem> wf = addWorkflowContingencyToWorkunit(conts.recovery, WFTypeRecovery, WFModeNormal, queryDirectDependencies(cur.queryChild(0)), NULL, wfid);
-            info.extractStoredInfo(&cur, original, isRoxie);
+            info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
             break;
             }
         case no_attr:
@@ -5685,7 +5718,7 @@ IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransfo
             info.storedName->queryValue()->getStringValue(persistName);
             unsigned persistWfid = ++wfidCount;
             Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModePersist, queryDirectDependencies(setValue), conts, info.queryCluster());
-            setWorkflowPersist(wf, persistName.str(), persistWfid);
+            setWorkflowPersist(wf, persistName.str(), persistWfid, info.queryMaxPersistCopies());
 
             Owned<IWorkflowItem> wfPersist = addWorkflowToWorkunit(persistWfid, WFTypeNormal, WFModeNormal, NULL);
             DependenciesUsed dependencies(false);
@@ -5698,7 +5731,7 @@ IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransfo
             checkArgs.append(*createExprAttribute(_files_Atom, dependencies.tablesRead));
             if (dependencies.resultsRead.ordinality())
                 checkArgs.append(*createExprAttribute(_results_Atom, dependencies.resultsRead));
-            checkArgs.append(*createAttribute(_original_Atom, LINK(original)));
+            checkArgs.append(*createAttribute(_codehash_Atom, LINK(codehash)));
             checkArgs.append(*createAttribute(namedAtom, LINK(info.storedName)));
             if (expr->isDataset())
                 checkArgs.append(*createAttribute(fileAtom));

+ 2 - 1
ecl/hqlcpp/hqlttcpp.ipp

@@ -455,7 +455,7 @@ protected:
     IWorkflowItem *           addWorkflowToWorkunit(unsigned wfid, WFType type, WFMode mode, UnsignedArray const & dependencies, ContingencyData const & conts, IHqlExpression * cluster);
     IWorkflowItem *           addWorkflowContingencyToWorkunit(unsigned wfid, WFType type, WFMode mode, UnsignedArray const & dependencies, IHqlExpression * cluster, unsigned wfidFor) { ContingencyData conts; conts.contingencyFor = wfidFor; return addWorkflowToWorkunit(wfid, type, mode, dependencies, conts, cluster); }
 
-    void setWorkflowPersist(IWorkflowItem * wf, char const * persistName, unsigned persistWfid);
+    void setWorkflowPersist(IWorkflowItem * wf, char const * persistName, unsigned persistWfid, int  numPersistInstances);
     void setWorkflowSchedule(IWorkflowItem * wf, ScheduleData const & sched);
 
     virtual IHqlExpression *  createTransformed(IHqlExpression * expr);
@@ -534,6 +534,7 @@ protected:
     bool                      isRootAction;
     bool                      isRoxie;
     bool                      expandPersistInputDependencies;
+    bool                      multiplePersistInstances;
     UnsignedArray             cumulativeDependencies;
     UnsignedArray             emptyDependencies;
     UnsignedArray             storedWfids;

+ 13 - 0
ecl/regress/issue10022a.ecl

@@ -0,0 +1,13 @@
+#option ('multiplePersistInstances', false);
+
+ds := dataset('x', { string line }, FLAT);
+
+p1 := ds(line <> '') : persist('p1', many, single);
+
+p2 := ds(line = '') : persist('p2', multiple);
+
+p3 := ds(line[1]='!') : persist('p3','10Mway',multiple(10));
+
+p4 := ds(line[1] = ' ') : persist('p4');
+
+output(p1 + p2 + p3 + p4);

+ 13 - 0
ecl/regress/issue10022b.ecl

@@ -0,0 +1,13 @@
+#option ('multiplePersistInstances', true);
+
+ds := dataset('x', { string line }, FLAT);
+
+p1 := ds(line <> '') : persist('p1', many, single);
+
+p2 := ds(line = '') : persist('p2', multiple);
+
+p3 := ds(line[1]='!') : persist('p3','10Mway',multiple(10));
+
+p4 := ds(line[1] = ' ') : persist('p4');
+
+output(p1 + p2 + p3 + p4);

+ 16 - 0
testing/ecl/issue10022.ecl

@@ -0,0 +1,16 @@
+//noroxie
+//this really should be supported by roxie, but it doesn't support it at the moment
+
+#option ('multiplePersistInstances', true);
+
+ds := dataset(['','!Hello',' me ', '', 'xx', '!end'], { string line });
+
+p1 := ds(line <> '') : persist('~p1', many, single);
+
+p2 := ds(line = '') : persist('~p2', multiple);
+
+p3 := ds(line[1]='!') : persist('~p3','10Mway',multiple(10));
+
+p4 := ds(line[1] = ' ') : persist('~p4');
+
+output(p1 & p2 & p3 & p4);

+ 13 - 0
testing/ecl/key/issue10022.xml

@@ -0,0 +1,13 @@
+<Dataset name='Result 1'>
+ <Row><line>!Hello</line></Row>
+ <Row><line> me </line></Row>
+ <Row><line>xx</line></Row>
+ <Row><line>!end</line></Row>
+ <Row><line></line></Row>
+ <Row><line></line></Row>
+ <Row><line>!Hello</line></Row>
+ <Row><line>!end</line></Row>
+ <Row><line></line></Row>
+ <Row><line> me </line></Row>
+ <Row><line></line></Row>
+</Dataset>