Procházet zdrojové kódy

Merge branch 'candidate-7.4.x'

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman před 6 roky
rodič
revize
baac80f77b

+ 0 - 9
esp/src/eclwatch/ActivityWidget.js

@@ -293,15 +293,6 @@ define([
 
 
                 this.openButton = registry.byId(this.id + "Open");
                 this.openButton = registry.byId(this.id + "Open");
                 this.refreshButton = registry.byId(this.id + "Refresh");
                 this.refreshButton = registry.byId(this.id + "Refresh");
-                this.autoRefreshButton = new ToggleButton({
-                    id: this.id + "AutoRefresh",
-                    iconClass: 'iconAutoRefresh',
-                    showLabel: false,
-                    checked: false,
-                    onClick: function (event) {
-                        context._onAutoRefresh(event);
-                    }
-                }).placeAt(this.refreshButton.domNode, "before");
                 var tmpSplitter = new ToolbarSeparator().placeAt(this.refreshButton.domNode, "before");
                 var tmpSplitter = new ToolbarSeparator().placeAt(this.refreshButton.domNode, "before");
                 this.clusterPauseButton = new Button({
                 this.clusterPauseButton = new Button({
                     id: this.id + "PauseButton",
                     id: this.id + "PauseButton",

+ 1 - 0
esp/src/eclwatch/templates/ActivityPageWidget.html

@@ -3,6 +3,7 @@
         <div id="${id}TabContainer" data-dojo-props="region: 'center', tabPosition: 'top'" style="width: 100%; height: 100%" data-dojo-type="dijit.layout.TabContainer">
         <div id="${id}TabContainer" data-dojo-props="region: 'center', tabPosition: 'top'" style="width: 100%; height: 100%" data-dojo-type="dijit.layout.TabContainer">
             <div id="${id}_Grid" style="width: 100%; height: 100%" data-dojo-props='title:"${gridTitle}"' data-dojo-type="dijit.layout.BorderContainer">
             <div id="${id}_Grid" style="width: 100%; height: 100%" data-dojo-props='title:"${gridTitle}"' data-dojo-type="dijit.layout.BorderContainer">
                 <div id="${id}Toolbar" class="topPanel" data-dojo-props="region: 'top'" data-dojo-type="dijit.Toolbar">
                 <div id="${id}Toolbar" class="topPanel" data-dojo-props="region: 'top'" data-dojo-type="dijit.Toolbar">
+                    <div id="${id}AutoRefresh" data-dojo-attach-event="onClick:_onAutoRefresh" data-dojo-props="iconClass:'iconAutoRefresh', showLabel:false, checked: false" data-dojo-type="dijit.form.ToggleButton">${i18n.AutoRefresh}</div>
                     <div id="${id}Refresh" data-dojo-attach-event="onClick:_onRefresh" data-dojo-props="iconClass:'iconRefresh'" data-dojo-type="dijit.form.Button">${i18n.Refresh}</div>
                     <div id="${id}Refresh" data-dojo-attach-event="onClick:_onRefresh" data-dojo-props="iconClass:'iconRefresh'" data-dojo-type="dijit.form.Button">${i18n.Refresh}</div>
                     <span data-dojo-type="dijit.ToolbarSeparator"></span>
                     <span data-dojo-type="dijit.ToolbarSeparator"></span>
                     <div id="${id}Open" data-dojo-attach-event="onClick:_onOpen" data-dojo-type="dijit.form.Button">${i18n.Open}</div>
                     <div id="${id}Open" data-dojo-attach-event="onClick:_onOpen" data-dojo-type="dijit.form.Button">${i18n.Open}</div>

+ 2 - 1
esp/src/src/ESPActivity.ts

@@ -171,7 +171,8 @@ var Activity = declare([ESPUtil.Singleton, ESPUtil.Monitor], {
                 }
                 }
                 var wu = item.Server === "DFUserver" ? ESPDFUWorkunit.Get(item.Wuid) : ESPWorkunit.Get(item.Wuid);
                 var wu = item.Server === "DFUserver" ? ESPDFUWorkunit.Get(item.Wuid) : ESPWorkunit.Get(item.Wuid);
                 wu.updateData(lang.mixin({
                 wu.updateData(lang.mixin({
-                    __hpcc_id: item.Wuid
+                    __hpcc_id: item.Wuid,
+                    component: "ActivityWidget"
                 }, item));
                 }, item));
                 if (!wu.isComplete || !wu.isComplete()) {
                 if (!wu.isComplete || !wu.isComplete()) {
                     queue.addChild(wu);
                     queue.addChild(wu);

+ 1 - 1
esp/src/src/ESPWorkunit.ts

@@ -190,7 +190,7 @@ var Workunit = declare([ESPUtil.Singleton], {  // jshint ignore:line
         if (justCompleted) {
         if (justCompleted) {
             topic.publish("hpcc/ecl_wu_completed", this);
             topic.publish("hpcc/ecl_wu_completed", this);
         }
         }
-        if (!this.hasCompleted) {
+        if (!this.hasCompleted && this.component !== "ActivityWidget") {
             this.startMonitor();
             this.startMonitor();
         }
         }
     },
     },

+ 67 - 0
fs/dafsserver/dafsserver.cpp

@@ -3209,6 +3209,62 @@ class CRemoteFileServer : implements IRemoteFileServer, public CInterface
     unsigned targetActiveThreads;
     unsigned targetActiveThreads;
     Linked<IPropertyTree> keyPairInfo;
     Linked<IPropertyTree> keyPairInfo;
 
 
+    class CHandleTracer
+    {
+        CTimeMon timer;
+        CriticalSection crit;
+        Owned<IFile> stdIOIFile;
+        std::vector<Owned<IFileIO>> reservedHandles;
+        unsigned handlesToReserve = 3; // need a few for pipe process to succeed
+
+        void reserveHandles()
+        {
+            if (stdIOIFile)
+            {
+                for (unsigned r=0; r<handlesToReserve; r++)
+                {
+                    IFileIO *iFileIO = stdIOIFile->open(IFOread);
+                    if (iFileIO)
+                        reservedHandles.push_back(iFileIO);
+                }
+            }
+        }
+        void releaseHandles()
+        {
+            reservedHandles.clear();
+        }
+    public:
+        CHandleTracer()
+        {
+            /* Reserve handles, so that when we run out, we hope to release them
+             * and thereby have enough to use when reading current state.
+             */
+            stdIOIFile.setown(createIFile("stdout:"));
+            timer.reset(0);
+            reserveHandles();
+        }
+        void traceIfReady()
+        {
+            CriticalBlock b(crit);
+            if (timer.timedout())
+            {
+                DBGLOG("Open handles:");
+                releaseHandles();
+                /* NB: can't guarantee that handles will be available after releaseHandles(), if other threads have allocated them.
+                 * If printLsOf fails, mark timer to retry again on next event in shorter time period.
+                 */
+                if (!printLsOf())
+                {
+                    DBGLOG("Failed to run lsof");
+                    timer.reset(1000); // next attempt in >=1 second
+                }
+                else
+                    timer.reset(60*1000); // next trace in >=1 minute
+                reserveHandles();
+            }
+        }
+    } handleTracer;
+
     int getNextHandle()
     int getNextHandle()
     {
     {
         // called in sect critical block
         // called in sect critical block
@@ -4766,8 +4822,10 @@ public:
         }
         }
         catch (IException *e)
         catch (IException *e)
         {
         {
+            checkOutOfHandles(e);
             reply.setWritePos(posOfErr);
             reply.setWritePos(posOfErr);
             formatException(reply, e, cmd, testSocketFlag, 0, client);
             formatException(reply, e, cmd, testSocketFlag, 0, client);
+            e->Release();
         }
         }
         return testSocketFlag;
         return testSocketFlag;
     }
     }
@@ -4777,6 +4835,12 @@ public:
         return new cCommandProcessor();
         return new cCommandProcessor();
     }
     }
 
 
+    void checkOutOfHandles(IException *exception)
+    {
+        if (EMFILE == exception->errorCode())
+            handleTracer.traceIfReady();
+    }
+
     virtual void run(DAFSConnectCfg _connectMethod, const SocketEndpoint &listenep, unsigned sslPort, const SocketEndpoint *rowServiceEp, bool _rowServiceSSL, bool _rowServiceOnStdPort) override
     virtual void run(DAFSConnectCfg _connectMethod, const SocketEndpoint &listenep, unsigned sslPort, const SocketEndpoint *rowServiceEp, bool _rowServiceSSL, bool _rowServiceOnStdPort) override
     {
     {
         SocketEndpoint sslep(listenep);
         SocketEndpoint sslep(listenep);
@@ -4950,6 +5014,7 @@ public:
                     if (exception)
                     if (exception)
                     {
                     {
                         EXCLOG(exception, "CRemoteFileServer");
                         EXCLOG(exception, "CRemoteFileServer");
+                        checkOutOfHandles(exception);
                         exception.clear();
                         exception.clear();
                         sockavail = false;
                         sockavail = false;
                     }
                     }
@@ -4991,6 +5056,7 @@ public:
                         sockSSL.clear();
                         sockSSL.clear();
                         cleanupDaFsSocket(ssock);
                         cleanupDaFsSocket(ssock);
                         ssock.clear();
                         ssock.clear();
+                        checkOutOfHandles(exception);
                         exception.clear();
                         exception.clear();
                         securesockavail = false;
                         securesockavail = false;
                     }
                     }
@@ -5025,6 +5091,7 @@ public:
                         acceptedRSSock.clear();
                         acceptedRSSock.clear();
                         cleanupDaFsSocket(ssock);
                         cleanupDaFsSocket(ssock);
                         ssock.clear();
                         ssock.clear();
+                        checkOutOfHandles(exception);
                         exception.clear();
                         exception.clear();
                         rowServiceSockAvail = false;
                         rowServiceSockAvail = false;
                     }
                     }

+ 78 - 2
system/jlib/jdebug.cpp

@@ -22,6 +22,7 @@
 #include "jhash.hpp"
 #include "jhash.hpp"
 #include "jmisc.hpp"
 #include "jmisc.hpp"
 #include "jexcept.hpp"
 #include "jexcept.hpp"
+#include "jfile.hpp"
 #include "jmutex.hpp"
 #include "jmutex.hpp"
 #include "jtime.hpp"
 #include "jtime.hpp"
 #include <stdio.h>
 #include <stdio.h>
@@ -2456,8 +2457,10 @@ public:
             fclose(netfp);
             fclose(netfp);
             return false;
             return false;
         }
         }
+        bool firstTime = false;
         if (!columnNames.length())
         if (!columnNames.length())
         {
         {
+            firstTime = true;
             columnNames.appendList(ln, " ");
             columnNames.appendList(ln, " ");
             ForEachItemInRev(idx, columnNames)
             ForEachItemInRev(idx, columnNames)
             {
             {
@@ -2510,12 +2513,14 @@ public:
                     }
                     }
                     if (queue > ret->rx_queue)
                     if (queue > ret->rx_queue)
                     {
                     {
-                        DBGLOG("UDP queue: new max rx_queue: port %d rx_queue=%u drops=%u", port, queue, drops);
+                        if (!firstTime)
+                            DBGLOG("UDP queue: new max rx_queue: port %d rx_queue=%u drops=%u", port, queue, drops);
                         ret->rx_queue = queue;
                         ret->rx_queue = queue;
                     }
                     }
                     if (drops > ret->drops)
                     if (drops > ret->drops)
                     {
                     {
-                        LOG(MCoperatorError, unknownJob, "DROPPED UDP PACKETS: port %d rx_queue=%u (peak %u) drops=%u (total %i)", port, queue, ret->rx_queue, drops-ret->drops, drops);
+                        if (!firstTime)
+                            LOG(MCoperatorError, unknownJob, "DROPPED UDP PACKETS: port %d rx_queue=%u (peak %u) drops=%u (total %i)", port, queue, ret->rx_queue, drops-ret->drops, drops);
                         ret->drops = drops;
                         ret->drops = drops;
                     }
                     }
                 }
                 }
@@ -4019,3 +4024,74 @@ jlib_decl IUserMetric *createUserMetric(const char *name, const char *matchStrin
 {
 {
     return new UserMetricMsgHandler(name, matchString);
     return new UserMetricMsgHandler(name, matchString);
 }
 }
+
+jlib_decl bool printProcessHandles(pid_t pid)
+{
+#if defined(__linux__)
+    StringBuffer curFilePathSB("/proc/");
+    if (pid)
+        curFilePathSB.append(pid);
+    else
+        curFilePathSB.append("self");
+    curFilePathSB.append("/fd/");
+    size32_t tailPos = curFilePathSB.length();
+
+    Owned<IFile> fdDir = createIFile(curFilePathSB);
+    if (!fdDir)
+    {
+        WARNLOG("Failed to create IFile for %s", curFilePathSB.str());
+        return false;
+    }
+    Owned<IDirectoryIterator> dirIter = fdDir->directoryFiles();
+    StringBuffer linkedFileNameSB, curFileNameSB;
+    char *linkedFileName = linkedFileNameSB.reserveTruncate(PATH_MAX);
+    ForEach(*dirIter)
+    {
+        dirIter->getName(curFileNameSB.clear());
+        curFilePathSB.setLength(tailPos);
+        curFilePathSB.append(curFileNameSB);
+        struct stat st;
+        int err = lstat(curFilePathSB, &st);
+        if (0 == err)
+        {
+            ssize_t sz = readlink(curFilePathSB, linkedFileName, PATH_MAX-1);
+            if (-1 != sz)
+            {
+                linkedFileNameSB.setLength(sz);
+                DBGLOG("%s -> %s", curFileNameSB.str(), linkedFileNameSB.str());
+            }
+        }
+        else
+        {
+            Owned<IException> e = makeErrnoExceptionV(errno, "Failed: err=%d", err);
+            EXCLOG(e, nullptr);
+        }
+    }
+#else
+// JCSMORE - other OS implementations
+#endif
+    return true;
+}
+
+jlib_decl bool printLsOf(pid_t pid)
+{
+#if defined(__linux__)
+    if (!pid)
+        pid = getpid();
+    // Use lsof to output handles of files and sockets
+    VStringBuffer cmd("lsof -n -P -d '^mem,^rtd,^txt,^cwd' -f -a -p %u", pid);
+    Owned<IPipeProcess> pipe = createPipeProcess();
+    if (!pipe->run("lsof", cmd, nullptr, false, true, false, 0, true))
+        return false;
+    Owned<ISimpleReadStream> stream = pipe->getOutputStream();
+    Owned<IStreamLineReader> lineReader = createLineReader(stream, false);
+    StringBuffer line;
+    while (!lineReader->readLine(line.clear()))
+        DBGLOG("%s", line.str());
+
+#else
+// JCSMORE - other OS implementations
+#endif
+    return true;
+}
+

+ 2 - 0
system/jlib/jdebug.hpp

@@ -440,6 +440,8 @@ extern jlib_decl void clearAffinityCache(); // should be called whenever the pro
 extern jlib_decl void printProcMap(const char *fn, bool printbody, bool printsummary, StringBuffer *lnout, MemoryBuffer *mb, bool useprintf);
 extern jlib_decl void printProcMap(const char *fn, bool printbody, bool printsummary, StringBuffer *lnout, MemoryBuffer *mb, bool useprintf);
 extern jlib_decl void PrintMemoryReport(bool full=true);
 extern jlib_decl void PrintMemoryReport(bool full=true);
 extern jlib_decl void printAllocationSummary();
 extern jlib_decl void printAllocationSummary();
+extern jlib_decl bool printProcessHandles(pid_t pid=0); // returns false if fails
+extern jlib_decl bool printLsOf(pid_t pid=0); // returns false if fails
 extern jlib_decl bool areTransparentHugePagesEnabled(HugePageMode mode);
 extern jlib_decl bool areTransparentHugePagesEnabled(HugePageMode mode);
 extern jlib_decl HugePageMode queryTransparentHugePagesMode();
 extern jlib_decl HugePageMode queryTransparentHugePagesMode();
 extern jlib_decl memsize_t getHugePageSize();
 extern jlib_decl memsize_t getHugePageSize();

+ 8 - 9
system/jlib/jthread.cpp

@@ -1988,15 +1988,14 @@ public:
         int inpipe[2];
         int inpipe[2];
         int outpipe[2];
         int outpipe[2];
         int errpipe[2];
         int errpipe[2];
-        if (hasinput)
-            if (::pipe(inpipe)==-1)
-                throw makeOsException(errno);
-        if (hasoutput)
-            if (::pipe(outpipe)==-1)
-                throw makeOsException(errno);
-        if (haserror)
-            if (::pipe(errpipe)==-1)
-                throw makeOsException(errno);
+        if ((hasinput && (::pipe(inpipe)==-1)) ||
+            (hasoutput && (::pipe(outpipe)==-1)) ||
+            (haserror && (::pipe(errpipe)==-1)))
+        {
+            retcode = START_FAILURE;
+            started.signal();
+            throw makeOsException(errno);
+        }
 
 
         /* NB: Important to call splitargs (which calls malloc) before the fork()
         /* NB: Important to call splitargs (which calls malloc) before the fork()
          * and not in the child process. Because performing malloc in the child
          * and not in the child process. Because performing malloc in the child