Browse Source

Merge remote-tracking branch 'origin/closedown-4.2.x'

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 11 years ago
parent
commit
2fde1e442d

+ 4 - 4
common/workunit/workunit.cpp

@@ -667,7 +667,7 @@ public:
     virtual unsigned __int64 getHash() const;
     virtual IStringIterator *getLogs(const char *type, const char *component) const;
     virtual IStringIterator *getProcesses(const char *type) const;
-    virtual IPropertyTreeIterator& getProcesses(const char *type, const char *instance) const;
+    virtual IPropertyTreeIterator* getProcesses(const char *type, const char *instance) const;
 
     virtual bool getWuDate(unsigned & year, unsigned & month, unsigned& day);
     virtual IStringVal & getSnapshot(IStringVal & str) const;
@@ -1140,7 +1140,7 @@ public:
             { return c->getLogs(type, instance); }
     virtual IStringIterator *getProcesses(const char *type) const
             { return c->getProcesses(type); }
-    virtual IPropertyTreeIterator& getProcesses(const char *type, const char *instance) const
+    virtual IPropertyTreeIterator* getProcesses(const char *type, const char *instance) const
             { return c->getProcesses(type, instance); }
 
     virtual void clearExceptions()
@@ -5277,7 +5277,7 @@ IStringIterator *CLocalWorkUnit::getLogs(const char *type, const char *instance)
         return new CStringPTreeAttrIterator(p->getElements(xpath.str()), "@log");
 }
 
-IPropertyTreeIterator& CLocalWorkUnit::getProcesses(const char *type, const char *instance) const
+IPropertyTreeIterator* CLocalWorkUnit::getProcesses(const char *type, const char *instance) const
 {
     VStringBuffer xpath("Process/%s/", type);
     if (instance)
@@ -5285,7 +5285,7 @@ IPropertyTreeIterator& CLocalWorkUnit::getProcesses(const char *type, const char
     else
         xpath.append("*");
     CriticalBlock block(crit);
-    return * p->getElements(xpath.str());
+    return p->getElements(xpath.str());
 }
 
 IStringIterator *CLocalWorkUnit::getProcesses(const char *type) const

+ 1 - 1
common/workunit/workunit.hpp

@@ -945,7 +945,7 @@ interface IConstWorkUnit : extends IInterface
     virtual unsigned __int64 getHash() const = 0;
     virtual IStringIterator *getLogs(const char *type, const char *instance=NULL) const = 0;
     virtual IStringIterator *getProcesses(const char *type) const = 0;
-    virtual IPropertyTreeIterator& getProcesses(const char *type, const char *instance) const = 0;
+    virtual IPropertyTreeIterator* getProcesses(const char *type, const char *instance) const = 0;
 };
 
 

+ 22 - 4
dali/base/dadfs.cpp

@@ -4473,8 +4473,11 @@ class CDistributedSuperFile: public CDistributedFileBase<IDistributedSuperFile>
                     {
                         sf->loadSubFiles(transaction, SDS_TRANSACTION_RETRY);
                         // potentially subfile _was_ a subfile, but isn't anymore, after dirty update
-                        if (!transaction->isSubFile(parent, subfile, true))
-                            WARNLOG("addSubFile: File %s is not a subfile of %s", subfile.get(), parent->queryLogicalName());
+                        if (!subfile.isEmpty())
+                        {
+                            if (!transaction->isSubFile(parent, subfile, true))
+                                WARNLOG("addSubFile: File %s is not a subfile of %s", subfile.get(), parent->queryLogicalName());
+                        }
                     }
                 }
                 if (sub)
@@ -6529,6 +6532,7 @@ public:
 // --------------------------------------------------------
 
 #define GROUP_CACHE_INTERVAL (1000*60)
+#define GROUP_EXCEPTION_CACHE_INTERVAL (1000*60*10)
 
 static GroupType translateGroupType(const char *groupType)
 {
@@ -6550,7 +6554,6 @@ public:
     Linked<IGroup> group;
     StringAttr name;
     StringAttr groupDir;
-    unsigned cachedtime;
     GroupType groupType;
     Linked<IException> exception;
 
@@ -6565,6 +6568,16 @@ public:
     {
         cachedtime = msTick();
     }
+
+    bool expired(unsigned timeNow)
+    {
+        if (exception)
+            return timeNow-cachedtime > GROUP_EXCEPTION_CACHE_INTERVAL;
+        else
+            return timeNow-cachedtime > GROUP_CACHE_INTERVAL;
+    }
+protected:
+    unsigned cachedtime;
 };
 
 class CNamedGroupStore: public CInterface, implements INamedGroupStore
@@ -6645,7 +6658,7 @@ public:
             ForEachItemInRev(idx, cache)
             {
                 CNamedGroupCacheEntry &entry = cache.item(idx);
-                if (timeNow-entry.cachedtime > GROUP_CACHE_INTERVAL)
+                if (entry.expired(timeNow))
                 {
                     cache.remove(idx);
                 }
@@ -6971,6 +6984,11 @@ public:
         return ret;
     }
 
+    void resetCache()
+    {
+        CriticalBlock block(cachesect);
+        cache.kill();
+    }
 private:
     bool getRemoteGroup(const INode *foreigndali, const char *gname, unsigned foreigndalitimeout,
                            StringAttr &groupdir, GroupType &type, SocketEndpointArray &epa)

+ 1 - 0
dali/base/dadfs.hpp

@@ -587,6 +587,7 @@ interface INamedGroupStore: implements IGroupResolver
     virtual IGroup *lookup(const char *logicalgroupname, StringBuffer &dir, GroupType &groupType) = 0;
     virtual unsigned setDefaultTimeout(unsigned timems) = 0;     // sets default timeout for SDS connections and locking
     virtual unsigned setRemoteTimeout(unsigned timems) = 0;      // sets default timeout for remote SDS connections and locking
+    virtual void resetCache() = 0;      // resets any cached lookups
 };
 
 extern da_decl INamedGroupStore  &queryNamedGroupStore();

+ 111 - 25
dali/base/dasess.cpp

@@ -227,7 +227,7 @@ public:
         return true;
     }
 
-    const CSessionState *query(SessionId id)
+    CSessionState *query(SessionId id)
     {
         CHECKEDCRITICALBLOCK(sessstatesect,60000);
         return SuperHashTableOf<CSessionState,SessionId>::find(&id);
@@ -245,6 +245,7 @@ class CProcessSessionState: public CSessionState
 {
     INode *node;
     DaliClientRole role;
+    UInt64Array previousSessionIds;
 public:
     CProcessSessionState(SessionId id,INode *_node,DaliClientRole _role)
         : CSessionState(id)
@@ -261,17 +262,42 @@ public:
     {
         return *node;
     }
-
     DaliClientRole queryRole() const
     {
         return role;
     }
-
     StringBuffer &getDetails(StringBuffer &buf)
     {
         StringBuffer ep;
         return buf.appendf("%16"I64F"X: %s, role=%s",CSessionState::id,node->endpoint().getUrlStr(ep).str(),queryRoleName(role));
     }
+    void addSessionIds(CProcessSessionState &other, bool prevOnly)
+    {
+        loop
+        {
+            SessionId id = other.dequeuePreviousSessionId();
+            if (!id)
+                break;
+            previousSessionIds.append(id);
+        }
+        if (!prevOnly)
+            previousSessionIds.append(other.getId());
+    }
+    SessionId dequeuePreviousSessionId()
+    {
+        if (!previousSessionIds.ordinality())
+            return 0;
+        return previousSessionIds.pop();
+    }
+    unsigned previousSessionIdCount() const
+    {
+        return previousSessionIds.ordinality();
+    }
+    void removeOldSessionId(SessionId id)
+    {
+        if (previousSessionIds.zap(id))
+            PROGLOG("Removed old sessionId (%"I64F"x) from current process state", id);
+    }
 };
 
 class CMapProcessToSession: private SuperHashTableOf<CProcessSessionState,INode>
@@ -326,21 +352,28 @@ public:
         return true;
     }
 
+    void replace(CProcessSessionState *e)
+    {
+        CHECKEDCRITICALBLOCK(mapprocesssect,60000);
+        SuperHashTableOf<CProcessSessionState,INode>::replace(*e);
+    }
+
     CProcessSessionState *query(INode *n) 
     {
         CHECKEDCRITICALBLOCK(mapprocesssect,60000);
         return SuperHashTableOf<CProcessSessionState,INode>::find(n);
     }
     
-    void remove(INode *n,ISessionManagerServer *manager)
+    bool remove(const CProcessSessionState *state, ISessionManagerServer *manager)
     {
         CHECKEDCRITICALBLOCK(mapprocesssect,60000);
-        CProcessSessionState *sstate = SuperHashTableOf<CProcessSessionState,INode>::find(n);
-        if (sstate) {
+        if (SuperHashTableOf<CProcessSessionState,INode>::removeExact((CProcessSessionState *)state))
+        {
             if (manager)
-                manager->authorizeConnection(sstate->queryRole(),true);
-            SuperHashTableOf<CProcessSessionState,INode>::removeExact(sstate);
+                manager->authorizeConnection(state->queryRole(), true);
+            return true;
         }
+        return false;
     }
 
     unsigned count()
@@ -1211,7 +1244,6 @@ class CCovenSessionManager: public CSessionManagerBase, implements ISessionManag
         // no fail currently
     }
 
-
 public:
     IMPLEMENT_IINTERFACE;
 
@@ -1279,13 +1311,22 @@ public:
         PROGLOG("Session starting %"I64F"x (%s) : role=%s",id,client->endpoint().getUrlStr(str).str(),queryRoleName(role));
         CHECKEDCRITICALBLOCK(sessmanagersect,60000);
         CProcessSessionState *s = new CProcessSessionState(id,client,role);
-        while (!sessionstates.add(s)) { // takes ownership
+        while (!sessionstates.add(s)) // takes ownership
+        {
             WARNLOG("Dali session manager: session already registered");
             sessionstates.remove(id);
         }
-        while (!processlookup.add(s)) {
-            ERRLOG("Dali session manager: registerClient process session already registered");
-            processlookup.remove(client,this);
+        while (!processlookup.add(s))
+        {
+            /* There's existing ip:port match (client) in process table..
+             * Old may be in process of closing or about to, but new has beaten the onClose() to it..
+             * Track old sessions in new CProcessSessionState instance, so that upcoming onClose() can find them
+             */
+            CProcessSessionState *previousState = processlookup.query(client);
+            dbgassertex(previousState); // Must be there, it's reason add() failed
+            s->addSessionIds(*previousState, false); // merges sessions from previous process state into new one that replaces it
+            WARNLOG("Dali session manager: registerClient process session already registered, old replaced");
+            processlookup.remove(previousState, this);
         }
     }
 
@@ -1613,7 +1654,7 @@ protected:
     {
         PROGLOG("Session stopping %"I64F"x %s",id,abort?"aborted":"ok");
         CHECKEDCRITICALBLOCK(sessmanagersect,60000);
-        // do in multiple stages as may remove one or more sub sussions
+        // do in multiple stages as may remove one or more sub sessions
         loop
         {
             CIArrayOf<CSessionSubscriptionStub> tonotify;
@@ -1635,7 +1676,7 @@ protected:
             ForEachItemIn(j2,tonotify)
             {
                 CSessionSubscriptionStub &stub = tonotify.item(j2);
-                try { stub.notify(abort);}
+                try { stub.notify(abort); }
                 catch (IException *e) { e->Release(); } // subscriber session may abort during stopSession
             }
             tonotify.kill(); // clear whilst sessmanagersect unblocked, as subs may query session manager.
@@ -1643,31 +1684,76 @@ protected:
         const CSessionState *state = sessionstates.query(id);
         if (state)
         {
-            const CProcessSessionState *pstate = QUERYINTERFACE(state,const CProcessSessionState);
-            if (pstate) 
-                processlookup.remove(&pstate->queryNode(),this);
+            const CProcessSessionState *pState = QUERYINTERFACE(state, const CProcessSessionState);
+            if (pState)
+            {
+                CProcessSessionState *cState = processlookup.query(&pState->queryNode()); // get current
+                if (pState == cState) // so is current one.
+                {
+                    /* This is reinstating a previous CProcessSessionState for this node (if there is one),
+                     * that has not yet stopped, and adding any other pending process states to the CProcessSessionState
+                     * being reinstated.
+                     */
+                    SessionId prevId = cState->dequeuePreviousSessionId();
+                    if (prevId)
+                    {
+                        CSessionState *prevSessionState = sessionstates.query(prevId);
+                        dbgassertex(prevSessionState); // must be there
+                        CProcessSessionState *prevProcessState = QUERYINTERFACE(prevSessionState, CProcessSessionState);
+                        dbgassertex(prevSessionState);
+                        /* NB: prevProcessState's have 0 entries in their previousSessionIds, since they were merged at replacement time
+                         * in addProcessSession()
+                         */
+                        prevProcessState->addSessionIds(*cState, true); // add in any remaining
+                        processlookup.replace(prevProcessState);
+                    }
+                    else
+                        processlookup.remove(pState, this);
+                }
+                else
+                {
+                    if (processlookup.remove(pState, this)) // old may have been removed when replaced
+                    {
+                        if (cState)
+                        {
+                            PROGLOG("Session (%"I64F"x) was replaced, ensuring removed from new process state", id);
+                            cState->removeOldSessionId(id); // If already replaced, then must ensure no longer tracked by new
+                        }
+                    }
+                }
+            }
             sessionstates.remove(id);
         }
     }
 
     void onClose(SocketEndpoint &ep)
     {
-        StringBuffer str;
-        PROGLOG("Client closed (%s)",ep.getUrlStr(str).str());
+        StringBuffer clientStr;
+        PROGLOG("Client closed (%s)", ep.getUrlStr(clientStr).str());
+
         SessionId idtostop;
         {
             CHECKEDCRITICALBLOCK(sessmanagersect,60000);
             Owned<INode> node = createINode(ep);
-            if (queryCoven().inCoven(node)) {
-                StringBuffer str;
-                PROGLOG("Coven Session Stopping (%s)",ep.getUrlStr(str).str());
+            if (queryCoven().inCoven(node))
+            {
+                PROGLOG("Coven Session Stopping (%s)", clientStr.str());
                 // more TBD here
                 return;
             }
-            CProcessSessionState *s= processlookup.query(node);
+            CProcessSessionState *s = processlookup.query(node);
             if (!s)
                 return;
-            idtostop = s->getId();
+            idtostop = s->dequeuePreviousSessionId();
+            if (idtostop)
+            {
+                PROGLOG("Previous sessionId (%"I64F"x) for %s was replaced by (%"I64F"x), stopping old session now", idtostop, clientStr.str(), s->getId());
+                unsigned c = s->previousSessionIdCount();
+                if (c) // very unlikely, but could be >1, trace for info.
+                    PROGLOG("%d old sessions pending closure", c);
+            }
+            else
+                idtostop = s->getId();
         }
         stopSession(idtostop,true);
     }

+ 11 - 3
docs/ECLLanguageReference/ECLR-includer.xml

@@ -138,9 +138,17 @@
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
   </chapter>
 
-  <chapter id="Value_Types_Chapter">
+  <chapter id="Value_Types">
     <title><emphasis role="bold">Value Types</emphasis></title>
 
+    <para>Value types<indexterm>
+        <primary>Value Types</primary>
+      </indexterm> declare an Attribute's type when placed left of the
+    Attribute name in the definition. They also declare a passed parameter's
+    type when placed left of the parameter name in the definition. Value types
+    also explicitly cast from type to another when placed in parentheses left
+    of the expression to cast.</para>
+
     <xi:include href="ECLLanguageReference/ECLR_mods/Value-Boolean.xml"
                 xpointer="element(/1)"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
@@ -866,9 +874,9 @@
     <xi:include href="ECLLanguageReference/ECLR_mods/BltInFunc-TOUNICODE.xml"
                 xpointer="element(/1)"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
-   
+
     <xi:include href="ECLLanguageReference/ECLR_mods/BltInFunc-TOXML.xml"
-                   xpointer="element(/1)"
+                xpointer="element(/1)"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
 
     <xi:include href="ECLLanguageReference/ECLR_mods/BltInFunc-TRANSFER.xml"

+ 5 - 2
docs/ECLLanguageReference/ECLR_mods/BltInFunc-EVENTEXTRA.xml

@@ -19,7 +19,7 @@
           <row>
             <entry>Return:</entry>
 
-            <entry>EVENTNAME returns a single string value.</entry>
+            <entry>EVENTEXTRA returns a single string value.</entry>
           </row>
         </tbody>
       </tgroup>
@@ -49,5 +49,8 @@ WAIT('MyServiceComplete');
 OUTPUT('WORKUNIT DONE')
 </programlisting>
 
-  <para>See Also: <link linkend="EVENT">EVENT</link>, <link linkend="EVENTNAME">EVENTNAME</link>, <link linkend="CRON">CRON</link>, <link linkend="WHEN">WHEN</link>, <link linkend="WAIT">WAIT</link>, <link linkend="NOTIFY">NOTIFY</link></para>
+  <para>See Also: <link linkend="EVENT">EVENT</link>, <link
+  linkend="EVENTNAME">EVENTNAME</link>, <link linkend="CRON">CRON</link>,
+  <link linkend="WHEN">WHEN</link>, <link linkend="WAIT">WAIT</link>, <link
+  linkend="NOTIFY">NOTIFY</link></para>
 </sect1>

+ 23 - 35
docs/ECLLanguageReference/ECLR_mods/Value-Boolean.xml

@@ -1,39 +1,28 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<sect1 id="Value_Types" role="nobrk">
-  <title>Value Types<indexterm>
-      <primary>Value Types</primary>
-    </indexterm></title>
-
-  <para>Value types declare an Attribute's type when placed left of the
-  Attribute name in the definition. They also declare a passed parameter's
-  type when placed left of the parameter name in the definition. Value types
-  also explicitly cast from type to another when placed in parentheses left of
-  the expression to cast.</para>
-
-  <sect2 id="BOOLEAN">
-    <title>BOOLEAN</title>
-
-    <para><emphasis role="bold">BOOLEAN<indexterm>
-        <primary>BOOLEAN</primary>
-      </indexterm><indexterm>
-        <primary>BOOLEAN value type</primary>
-      </indexterm></emphasis></para>
-
-    <para>A Boolean true/false value. <emphasis
-    role="bold">TRUE</emphasis><indexterm>
-        <primary>TRUE</primary>
-      </indexterm> and <emphasis role="bold">FALSE<indexterm>
-        <primary>FALSE</primary>
-      </indexterm></emphasis> are reserved ECL keywords; they are Boolean
-    constants that may be used to compare against a BOOLEAN type. When BOOLEAN
-    is used in a RECORD structure, a single-byte integer containing one (1) or
-    zero (0) is output.</para>
-
-    <para>Example:</para>
-
-    <programlisting>BOOLEAN MyBoolean := SomeAttribute &gt; 10;
+<sect1 id="BOOLEAN" role="nobrk">
+  <title>BOOLEAN</title>
+
+  <para><emphasis role="bold">BOOLEAN<indexterm>
+      <primary>BOOLEAN</primary>
+    </indexterm><indexterm>
+      <primary>BOOLEAN value type</primary>
+    </indexterm></emphasis></para>
+
+  <para>A Boolean true/false value. <emphasis
+  role="bold">TRUE</emphasis><indexterm>
+      <primary>TRUE</primary>
+    </indexterm> and <emphasis role="bold">FALSE<indexterm>
+      <primary>FALSE</primary>
+    </indexterm></emphasis> are reserved ECL keywords; they are Boolean
+  constants that may be used to compare against a BOOLEAN type. When BOOLEAN
+  is used in a RECORD structure, a single-byte integer containing one (1) or
+  zero (0) is output.</para>
+
+  <para>Example:</para>
+
+  <programlisting>BOOLEAN MyBoolean := SomeAttribute &gt; 10;
         // declares MyBoolean a BOOLEAN Attribute
      
 BOOLEAN MyBoolean(INTEGER p) := p &gt; 10;
@@ -42,6 +31,5 @@ BOOLEAN MyBoolean(INTEGER p) := p &gt; 10;
 BOOLEAN Typtrd := trades.trd_type = 'R';
         // Typtrd is a Boolean attribute, likely to be used as a filter</programlisting>
 
-    <para>See Also: <link linkend="TRUE_FALSE">TRUE/FALSE</link></para>
-  </sect2>
+  <para>See Also: <link linkend="TRUE_FALSE">TRUE/FALSE</link></para>
 </sect1>

+ 1 - 1
docs/ECLStandardLibraryReference/SLR-Mods/AddSuperFile.xml

@@ -34,7 +34,7 @@
           <entry><emphasis>subfile</emphasis></entry>
 
           <entry>A null-terminated string containing the logical name of the
-          sub-file. Ths may be another superfile.</entry>
+          sub-file. This may be another superfile.</entry>
         </row>
 
         <row>

+ 53 - 0
docs/ECLStandardLibraryReference/SLR-Mods/RemoveOwnedSubFiles.xml

@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<sect1 id="RemoveOwnedSubFiles">
+  <title>RemoveOwnedSubFiles</title>
+
+  <para><emphasis role="bold">STD.File.RemoveOwnedSubFiles<indexterm>
+      <primary>STD.File.RemoveOwnedSubFiles</primary>
+    </indexterm><indexterm>
+      <primary>File.RemoveOwnedSubFiles</primary>
+    </indexterm><indexterm>
+      <primary>RemoveOwnedSubFiles</primary>
+    </indexterm>(</emphasis> <emphasis> superfile</emphasis><emphasis
+  role="bold">)</emphasis></para>
+
+  <informaltable colsep="1" frame="all" rowsep="1">
+    <tgroup cols="2">
+      <colspec colwidth="80.50pt" />
+
+      <colspec />
+
+      <tbody>
+        <row>
+          <entry><emphasis>superfile</emphasis></entry>
+
+          <entry>A null-terminated string containing the logical name of the
+          superfile.</entry>
+        </row>
+
+        <row>
+          <entry>Return:<emphasis> </emphasis></entry>
+
+          <entry>Null.</entry>
+        </row>
+      </tbody>
+    </tgroup>
+  </informaltable>
+
+  <para>The <emphasis role="bold">RemoveOwnedSubFiles </emphasis>function
+  removes all owned sub-files from the specified superfile. These are only
+  removed if they are soley owned by the superfile. If a subfile is co-owned,
+  (i.e.,a member of any other superfile), then the removal is ignored.</para>
+
+  <para>This function may be included in a superfile transaction.</para>
+
+  <para>Example:</para>
+
+  <programlisting format="linespecific">SEQUENTIAL(
+ STD.File.StartSuperFileTransaction(),
+ STD.File.RemoveOwnedSubFiles('MySuperFile'),
+ STD.File.FinishSuperFileTransaction()
+);</programlisting>
+</sect1>

+ 3 - 0
docs/ECLStandardLibraryReference/SLR-includer.xml

@@ -137,6 +137,9 @@
     <xi:include href="ECLStandardLibraryReference/SLR-Mods/ClearSuperFile.xml"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
 
+    <xi:include href="ECLStandardLibraryReference/SLR-Mods/RemoveOwnedSubFiles.xml"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
     <xi:include href="ECLStandardLibraryReference/SLR-Mods/SwapSuperFile.xml"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
 

+ 2 - 2
docs/HPCCClientTools/CT_Mods/CT_ECL_CLI.xml

@@ -2315,7 +2315,7 @@ ecl packagemap validate roxie --active</programlisting>
       <sect2 role="brk">
         <title>ecl roxie detach</title>
 
-        <para><emphasis role="bold">ecl roxie attach
+        <para><emphasis role="bold">ecl roxie detach
         &lt;processName&gt;</emphasis></para>
 
         <para>Examples:</para>
@@ -2333,7 +2333,7 @@ ecl packagemap validate roxie --active</programlisting>
 
               <tbody>
                 <row>
-                  <entry>ecl roxie attach</entry>
+                  <entry>ecl roxie detach</entry>
 
                   <entry>Detach the roxie from Dali</entry>
                 </row>

+ 12 - 7
esp/services/ws_workunits/ws_workunitsHelpers.cpp

@@ -565,11 +565,12 @@ void WsWuInfo::getHelpers(IEspECLWorkunit &info, unsigned flags)
 
         if (cw->getWuidVersion() > 0)
         {
-            IPropertyTreeIterator& eclAgents = cw->getProcesses("EclAgent", NULL);
-            ForEach (eclAgents)
+            Owned<IPropertyTreeIterator> eclAgents = cw->getProcesses("EclAgent", NULL);
+            ForEach (*eclAgents)
             {
-                StringBuffer logName, agentPID;
-                eclAgents.query().getProp("@log",logName);
+                StringBuffer logName;
+                IPropertyTree& eclAgent = eclAgents->query();
+                eclAgent.getProp("@log",logName);
                 if (!logName.length())
                     continue;
 
@@ -583,8 +584,8 @@ void WsWuInfo::getHelpers(IEspECLWorkunit &info, unsigned flags)
                         h->setFileSize(fileSize);
                     if (version >= 1.44)
                     {
-                        if (eclAgents.query().hasProp("@pid"))
-                            h->setPID(eclAgents.query().getPropInt("@pid"));
+                        if (eclAgent.hasProp("@pid"))
+                            h->setPID(eclAgent.getPropInt("@pid"));
                         else
                             h->setPID(cw->getAgentPID());
                     }
@@ -953,7 +954,11 @@ unsigned WsWuInfo::getWorkunitThorLogInfo(IArrayOf<IEspECLHelpFile>& helpers, IE
     if (cw->getWuidVersion() > 0)
     {
         SCMStringBuffer clusterName;
-        Owned<IConstWUClusterInfo> clusterInfo = getTargetClusterInfo(cw->getClusterName(clusterName).str());
+        cw->getClusterName(clusterName);
+        if (!clusterName.length()) //Cluster name may not be set yet
+            return countThorLog;
+
+        Owned<IConstWUClusterInfo> clusterInfo = getTargetClusterInfo(clusterName.str());
         if (!clusterInfo)
         {
             SCMStringBuffer wuid;

+ 5 - 4
esp/services/ws_workunits/ws_workunitsQuerySets.cpp

@@ -32,6 +32,7 @@
 
 const unsigned ROXIECONNECTIONTIMEOUT = 1000;   //1 second
 const unsigned ROXIECONTROLQUERYTIMEOUT = 3000; //3 second
+const unsigned ROXIELOCKCONNECTIONTIMEOUT = 60000; //60 second
 
 #define SDS_LOCK_TIMEOUT (5*60*1000) // 5mins, 30s a bit short
 
@@ -1628,9 +1629,8 @@ void CWsWorkunitsEx::getGraphsByQueryId(const char *target, const char *queryId,
     if (eps.empty())
         return;
 
-    VStringBuffer xpath("<control:querystats><Query id='%s'/></control:querystats>", queryId);
-    Owned<ISocket> sock = ISocket::connect_timeout(eps.item(0), ROXIECONNECTIONTIMEOUT);
-    Owned<IPropertyTree> querystats = sendRoxieControlQuery(sock, xpath.str(), ROXIECONTROLQUERYTIMEOUT);
+    VStringBuffer control("<control:querystats><Query id='%s'/></control:querystats>", queryId);
+    Owned<IPropertyTree> querystats = sendRoxieControlAllNodes(eps.item(0), control.str(), false, ROXIELOCKCONNECTIONTIMEOUT);
     if (!querystats)
         return;
 
@@ -1642,7 +1642,7 @@ void CWsWorkunitsEx::getGraphsByQueryId(const char *target, const char *queryId,
         if (graphId && *graphId && !strieq(graphId, aGraphId))
             continue;
 
-        IPropertyTree* xgmml = graph.getBranch("xgmml/graph/graph");
+        IPropertyTree* xgmml = graph.getBranch("xgmml/graph");
         if (!xgmml)
             continue;
 
@@ -1657,6 +1657,7 @@ void CWsWorkunitsEx::getGraphsByQueryId(const char *target, const char *queryId,
             VStringBuffer xpath("//node[@id='%s']", subGraphId);
             toXML(xgmml->queryPropTree(xpath.str()), xml);
         }
+
         g->setGraph(xml.str());
         ECLGraphs.append(*g.getClear());
     }

+ 39 - 9
esp/services/ws_workunits/ws_workunitsService.cpp

@@ -3915,15 +3915,16 @@ bool CWsWorkunitsEx::onWUDeployWorkunit(IEspContext &context, IEspWUDeployWorkun
 
 void CWsWorkunitsEx::addProcessLogfile(IZZIPor* zipper, Owned<IConstWorkUnit> &cwu, WsWuInfo &winfo, const char * process, PointerArray &mbArr)
 {
-    IPropertyTreeIterator& proc = cwu->getProcesses(process, NULL);
-    ForEach (proc)
+    Owned<IPropertyTreeIterator> procs = cwu->getProcesses(process, NULL);
+    ForEach (*procs)
     {
         StringBuffer logSpec;
-        proc.query().getProp("@log",logSpec);
+        IPropertyTree& proc = procs->query();
+        proc.getProp("@log",logSpec);
         if (!logSpec.length())
             continue;
         StringBuffer pid;
-        pid.appendf("%d",proc.query().getPropInt("@pid"));
+        pid.appendf("%d",proc.getPropInt("@pid"));
         MemoryBuffer * pMB = NULL;
         try
         {
@@ -4035,16 +4036,45 @@ bool CWsWorkunitsEx::onWUCreateZAPInfo(IEspContext &context, IEspWUCreateZAPInfo
 
             //add ECL query/archive to zip
             Owned<IConstWUQuery> query = cwu->getQuery();
-            StringBuffer ecl;//String buffers containing file contents must persist until ziptofile is called !
+            StringBuffer eclContents;//String buffers containing file contents must persist until ziptofile is called !
+            StringBuffer archiveContents;//String buffers containing file contents must persist until ziptofile is called !
             if(query)
             {
+                //Add archive if present
+                Owned<IConstWUAssociatedFileIterator> iter = &query->getAssociatedFiles();
+                ForEach(*iter)
+                {
+                    IConstWUAssociatedFile & cur = iter->query();
+                    SCMStringBuffer ssb;
+                    cur.getDescription(ssb);
+                    if (0 == stricmp(ssb.str(), "archive"))
+                    {
+                        cur.getName(ssb);
+                        if (ssb.length())
+                        {
+                            fs.clear().append("ZAPReport_").append(req.getWuid()).append('_').append(userName.str()).append(".archive");
+                            try
+                            {
+                                archiveContents.loadFile(ssb.str());
+                                zipper->addContentToZIP(archiveContents.length(), (void*)archiveContents.str(), (char*)fs.str(), true);
+                            }
+                            catch (IException *E)
+                            {
+                                DBGLOG("Error accessing archive file %s", ssb.str());
+                                E->Release();
+                            }
+                            break;
+                        }
+                    }
+                }
+
+                //Add Query
                 query->getQueryText(temp);
                 if (temp.length())
                 {
-                    fs.clear().append("ZAPReport_").append(req.getWuid()).append('_').append(userName.str()).append(".");
-                    fs.append(isArchiveQuery(temp.str()) ? "archive" : "ecl");
-                    ecl.append(temp.str());
-                    zipper->addContentToZIP(ecl.length(), (void*)ecl.str(), (char*)fs.str(), true);
+                    fs.clear().append("ZAPReport_").append(req.getWuid()).append('_').append(userName.str()).append(".ecl");
+                    eclContents.append(temp.str());
+                    zipper->addContentToZIP(eclContents.length(), (void*)eclContents.str(), (char*)fs.str(), true);
                 }
             }
 

+ 51 - 43
roxie/ccd/ccddali.cpp

@@ -419,43 +419,44 @@ public:
         // NOTE - we rely on the fact that  queryNamedGroupStore().lookup caches results,to avoid excessive load on remote dali
         if (_lfn && !strnicmp(_lfn, "foreign", 7)) //if need to support dali hopping should add each remote location
             return NULL;
-        if (!fdesc || !fdesc->queryProperties().hasProp("@cloneFrom"))
+        if (!fdesc)
             return NULL;
-        if (fdesc->queryProperties().hasProp("cloneFromGroup") && fdesc->queryProperties().hasProp("@cloneFromDir"))
+        const char *cloneFrom = fdesc->queryProperties().queryProp("@cloneFrom");
+        if (!cloneFrom)
+            return NULL;
+        StringBuffer foreignLfn("foreign::");
+        foreignLfn.append(cloneFrom);
+        if (!connected())
+            return resolveCachedLFN(foreignLfn);  // Note - cache only used when no dali connection available
+        try
         {
-            try
+            if (fdesc->queryProperties().hasProp("cloneFromGroup") && fdesc->queryProperties().hasProp("@cloneFromDir"))
             {
-                return recreateCloneSource(fdesc, _lfn);
+                Owned<IFileDescriptor> ret = recreateCloneSource(fdesc, _lfn);
+                if (cacheIt)
+                    cacheFileDescriptor(foreignLfn, ret);
+                return ret.getClear();
             }
-            catch (IException *E)
+            else // Legacy mode - recently cloned files should have the extra info
             {
-                E->Release();
-                return NULL;
+                if (traceLevel > 1)
+                    DBGLOG("checkClonedFromRemote: Resolving %s in legacy mode", _lfn);
+                Owned<IDistributedFile> cloneFile = resolveLFN(foreignLfn, cacheIt, false);
+                if (cloneFile)
+                {
+                    Owned<IFileDescriptor> cloneFDesc = cloneFile->getFileDescriptor();
+                    if (cloneFDesc->numParts()==fdesc->numParts())
+                        return cloneFDesc.getClear();
+
+                    DBGLOG(ROXIE_MISMATCH, "File %s cloneFrom(%s) mismatch", _lfn, cloneFrom);
+                }
             }
         }
-        else // Legacy mode - recently cloned files should have the extra info
+        catch (IException *E)
         {
-            if (traceLevel > 1)
-                DBGLOG("checkClonedFromRemote: Resolving %s in legacy mode", _lfn);
-            SocketEndpoint cloneFrom;
-            cloneFrom.set(fdesc->queryProperties().queryProp("@cloneFrom"));
-            if (cloneFrom.isNull())
-                return NULL;
-            CDfsLogicalFileName lfn;
-            lfn.set(_lfn);
-            lfn.setForeign(cloneFrom, false);
-            if (!connected())
-                return resolveCachedLFN(lfn.get());
-            Owned<IDistributedFile> cloneFile = resolveLFN(lfn.get(), cacheIt, false);
-            if (cloneFile)
-            {
-                Owned<IFileDescriptor> cloneFDesc = cloneFile->getFileDescriptor();
-                if (cloneFDesc->numParts()==fdesc->numParts())
-                    return cloneFDesc.getClear();
-
-                StringBuffer s;
-                DBGLOG(ROXIE_MISMATCH, "File %s cloneFrom(%s) mismatch", _lfn, cloneFrom.getIpText(s).str());
-            }
+            if (traceLevel > 3)
+                EXCLOG(E);
+            E->Release();  // Any failure means act as if no remote info
         }
         return NULL;
     }
@@ -475,20 +476,7 @@ public:
                     dfsFile.clear();
             }
             if (cacheIt)
-            {
-                Owned<IFileDescriptor> fd;
-                Owned<IPropertyTree> pt;
-                if (dfsFile)
-                {
-                    fd.setown(dfsFile->getFileDescriptor());
-                    if (fd)
-                        pt.setown(fd->getFileTree());
-                }
-                StringBuffer xpath("Files/");
-                StringBuffer lcname;
-                xpath.append(lcname.append(logicalName).toLowerCase());
-                writeCache(xpath.str(), xpath.str(), pt);
-            }
+                cacheDistributedFile(logicalName, dfsFile);
             if (traceLevel > 1)
                 DBGLOG("Dali lookup %s returned %s in %u ms", logicalName, dfsFile != NULL ? "match" : "NO match", msTick()-start);
             return dfsFile.getClear();
@@ -712,7 +700,27 @@ public:
             }
         }
     }
+protected:
+    void cacheDistributedFile(const char *logicalName, IDistributedFile *dfsFile)
+    {
+        assertex(isConnected);
+        Owned<IFileDescriptor> fd;
+        if (dfsFile)
+            fd.setown(dfsFile->getFileDescriptor());
+        cacheFileDescriptor(logicalName, fd);
+    }
 
+    void cacheFileDescriptor(const char *logicalName, IFileDescriptor *fd)
+    {
+        assertex(isConnected);
+        Owned<IPropertyTree> pt;
+        if (fd)
+            pt.setown(fd->getFileTree());
+        StringBuffer xpath("Files/");
+        StringBuffer lcname;
+        xpath.append(lcname.append(logicalName).toLowerCase());
+        writeCache(xpath.str(), xpath.str(), pt);
+    }
 };
 
 class CRoxieDllServer : public CInterface, implements IDllServer

+ 2 - 2
roxie/ccd/ccdquery.cpp

@@ -1081,10 +1081,10 @@ public:
                     if (thisGraphNameStr.length() && (stricmp(graphName, thisGraphNameStr.s.str()) != 0))
                         continue; // not interested in this one
                 }
-                reply.appendf("<Graph id='%s'><xgmml><graph>", thisGraphNameStr.s.str());
+                reply.appendf("<Graph id='%s'><xgmml>", thisGraphNameStr.s.str());
                 Owned<IPropertyTree> graphXgmml = graphs->query().getXGMMLTree(false);
                 getGraphStats(reply, *graphXgmml);
-                reply.append("</graph></xgmml></Graph>");
+                reply.append("</xgmml></Graph>");
             }
         }
     }

+ 4 - 0
roxie/ccd/ccdstate.cpp

@@ -2143,6 +2143,10 @@ private:
                 else
                     allQueryPackages->resetStats(NULL, logctx);
             }
+            else if (stricmp(queryName, "control:resetremotedalicache")==0)
+            {
+                queryNamedGroupStore().resetCache();
+            }
             else if (stricmp(queryName, "control:restart")==0)
             {
                 FatalError("Roxie process restarted by operator request");

+ 2 - 1
thorlcr/activities/diskread/thdiskread.cpp

@@ -30,7 +30,7 @@ class CDiskReadMasterVF : public CDiskReadMasterBase
 {
 public:
     CDiskReadMasterVF(CMasterGraphElement *info) : CDiskReadMasterBase(info) { }
-    virtual void validateFile()
+    virtual void validateFile(IDistributedFile *file)
     {
         IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper();
         bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags());
@@ -165,6 +165,7 @@ public:
         {
             if (!helper->hasSegmentMonitors() && !helper->hasFilter() && !(helper->getFlags() & TDXtemporary))
             {
+                Owned<IDistributedFile> file = queryThorFileManager().lookup(container.queryJob(), fileName, 0 != ((TDXtemporary|TDXjobtemp) & helper->getFlags()), 0 != (TDRoptional & helper->getFlags()));
                 if (file.get() && canMatch)
                 {
                     if (0 != (TDRunfilteredcount & helper->getFlags()) && file->queryAttributes().hasProp("@recordCount"))

+ 4 - 3
thorlcr/activities/hashdistrib/thhashdistrib.cpp

@@ -127,7 +127,7 @@ public:
 class IndexDistributeActivityMaster : public HashDistributeMasterBase
 {
     MemoryBuffer tlkMb;
-    Owned<IDistributedFile> file;
+    OwnedRoxieString indexFileName;
 
 public:
     IndexDistributeActivityMaster(CMasterGraphElement *info) : HashDistributeMasterBase(DM_index, info) { }
@@ -139,9 +139,9 @@ public:
         IHThorKeyedDistributeArg *helper = (IHThorKeyedDistributeArg *)queryHelper();
 
         StringBuffer scoped;
-        OwnedRoxieString indexFileName(helper->getIndexFileName());
+        indexFileName.setown(helper->getIndexFileName());
         queryThorFileManager().addScope(container.queryJob(), indexFileName, scoped);
-        file.setown(queryThorFileManager().lookup(container.queryJob(), indexFileName));
+        Owned<IDistributedFile> file = queryThorFileManager().lookup(container.queryJob(), indexFileName);
         if (!file)
             throw MakeActivityException(this, 0, "KeyedDistribute: Failed to find key: %s", scoped.str());
         if (0 == file->numParts())
@@ -173,6 +173,7 @@ public:
     virtual void done()
     {
         HashDistributeMasterBase::done();
+        Owned<IDistributedFile> file = queryThorFileManager().lookup(container.queryJob(), indexFileName, false, true);
         if (file)
             file->setAccessed();
     }

+ 11 - 7
thorlcr/activities/keypatch/thkeypatch.cpp

@@ -31,7 +31,7 @@ class CKeyPatchMaster : public CMasterActivity
     bool local;
     unsigned width;
     StringArray clusters;
-    Owned<IDistributedFile> originalIndexFile, patchFile;
+    OwnedRoxieString originalName, patchName;
 
 public:
     CKeyPatchMaster(CMasterGraphElement *info) : CMasterActivity(info)
@@ -44,10 +44,10 @@ public:
     {
         helper = (IHThorKeyPatchArg *)queryHelper();
 
-        OwnedRoxieString originalName(helper->getOriginalName());
-        OwnedRoxieString patchName(helper->getPatchName());
-        originalIndexFile.setown(queryThorFileManager().lookup(container.queryJob(), originalName));
-        patchFile.setown(queryThorFileManager().lookup(container.queryJob(), patchName));
+        originalName.setown(helper->getOriginalName());
+        patchName.setown(helper->getPatchName());
+        Owned<IDistributedFile> originalIndexFile = queryThorFileManager().lookup(container.queryJob(), originalName);
+        Owned<IDistributedFile> patchFile = queryThorFileManager().lookup(container.queryJob(), patchName);
         
         if (originalIndexFile->numParts() != patchFile->numParts())
             throw MakeActivityException(this, TE_KeyPatchIndexSizeMismatch, "Index %s and patch %s differ in width", originalName.get(), patchName.get());
@@ -164,8 +164,12 @@ public:
 
         container.queryTempHandler()->registerFile(outputName, container.queryOwner().queryGraphId(), 0, false, WUFileStandard, &clusters);
         queryThorFileManager().publish(container.queryJob(), outputName, false, *newIndexDesc);
-        originalIndexFile->setAccessed();
-        patchFile->setAccessed();
+        Owned<IDistributedFile> originalIndexFile = queryThorFileManager().lookup(container.queryJob(), originalName, false, true);
+        if (originalIndexFile)
+	        originalIndexFile->setAccessed();
+        Owned<IDistributedFile> patchFile = queryThorFileManager().lookup(container.queryJob(), patchName, false, true);
+        if (patchFile)
+	        patchFile->setAccessed();
     }
     void preStart(size32_t parentExtractSz, const byte *parentExtract)
     {

+ 10 - 6
thorlcr/activities/msort/thmsort.cpp

@@ -56,8 +56,8 @@ class CMSortActivityMaster : public CMasterActivity
     IThorSorterMaster *imaster;
     mptag_t mpTagRPC, barrierMpTag;
     Owned<IBarrier> barrier;
-    Owned<IDistributedFile> coSortFile;
-    
+    OwnedRoxieString cosortlogname;
+
 public:
     CMSortActivityMaster(CMasterGraphElement *info)
       : CMasterActivity(info)
@@ -85,6 +85,7 @@ protected:
             Owned<IException> e = MakeActivityException(this, 0, "Ignoring, unsupported sort order algorithm '%s'", algoname.get());
             reportExceptionToWorkunit(container.queryJob().queryWorkUnit(), e);
         }
+        cosortlogname.setown(helper->getSortedFilename());
     }
     virtual void serializeSlaveData(MemoryBuffer &dst, unsigned slave)
     {
@@ -137,10 +138,9 @@ protected:
                 skewThreshold = container.queryJob().getWorkUnitValueInt("defaultSkewThreshold", 0);
         }
         StringBuffer cosortfilenames;
-        OwnedRoxieString cosortlogname(helper->getSortedFilename());
         if (cosortlogname&&*cosortlogname)
         {
-            coSortFile.setown(queryThorFileManager().lookup(container.queryJob(), cosortlogname));
+            Owned<IDistributedFile> coSortFile = queryThorFileManager().lookup(container.queryJob(), cosortlogname);
             Owned<IFileDescriptor> fileDesc = coSortFile->getFileDescriptor();
             queryThorFileManager().noteFileRead(container.queryJob(), coSortFile);
             unsigned o;
@@ -195,8 +195,12 @@ protected:
     {
         ActPrintLog("done");
         CMasterActivity::done();
-        if (coSortFile)
-            coSortFile->setAccessed();
+        if (cosortlogname&&*cosortlogname)
+        {
+            Owned<IDistributedFile> coSortFile = queryThorFileManager().lookup(container.queryJob(), cosortlogname, false, true);
+            if (coSortFile)
+                coSortFile->setAccessed();
+        }
         ActPrintLog("done exit");
     }
 };

+ 3 - 2
thorlcr/activities/thdiskbase.cpp

@@ -39,7 +39,7 @@ void CDiskReadMasterBase::init()
 {
     IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *) queryHelper();
     fileName.setown(helper->getFileName());
-    file.setown(queryThorFileManager().lookup(container.queryJob(), fileName, 0 != ((TDXtemporary|TDXjobtemp) & helper->getFlags()), 0 != (TDRoptional & helper->getFlags()), true));
+    Owned<IDistributedFile> file = queryThorFileManager().lookup(container.queryJob(), fileName, 0 != ((TDXtemporary|TDXjobtemp) & helper->getFlags()), 0 != (TDRoptional & helper->getFlags()), true);
 
     if (file)
     {
@@ -74,7 +74,7 @@ void CDiskReadMasterBase::init()
                 }
             }
         }
-        validateFile();
+        validateFile(file);
         void *ekey;
         size32_t ekeylen;
         helper->getEncryptKey(ekeylen,ekey);
@@ -116,6 +116,7 @@ void CDiskReadMasterBase::done()
     fileDesc.clear();
     if (!abortSoon) // in case query has relinquished control of file usage to another query (e.g. perists)
     {
+        Owned<IDistributedFile> file = queryThorFileManager().lookup(container.queryJob(), fileName, 0 != ((TDXtemporary|TDXjobtemp) & helper->getFlags()), 0 != (TDRoptional & helper->getFlags()), true);
         if (file)
             file->setAccessed();
     }

+ 1 - 2
thorlcr/activities/thdiskbase.ipp

@@ -33,14 +33,13 @@ protected:
     IHash *hash;
     Owned<ProgressInfo> inputProgress;
     OwnedRoxieString fileName;
-    Owned<IDistributedFile> file;
 
 public:
     CDiskReadMasterBase(CMasterGraphElement *info);
     void init();
     void serializeSlaveData(MemoryBuffer &dst, unsigned slave);
     void done();
-    virtual void validateFile() { }
+    virtual void validateFile(IDistributedFile *file) { }
     void deserializeStats(unsigned node, MemoryBuffer &mb);
     void getXGMML(unsigned idx, IPropertyTree *edge);
 };

+ 2 - 0
thorlcr/graph/thgraph.cpp

@@ -891,6 +891,8 @@ bool isGlobalActivity(CGraphElementBase &container)
         {
             Owned<IHThorCsvReadArg> helper = (IHThorCsvReadArg *)container.helperFactory();
             // if header lines, then [may] need to co-ordinate across slaves
+            if (container.queryOwner().queryOwner() && (!container.queryOwner().isGlobal())) // I am in a child query
+                return false;
             return helper->queryCsvParameters()->queryHeaderLen() > 0;
         }
 // dependent on child acts?