Преглед на файлове

HPCC-22538 Add a helper function that dumps file and socket info

Based on lsof output, output which files a process has open,
and the sockets (and their endpoints) the process is using.

Call in dafilesrv when it runs out of handles for diagnostic
purposes.

Signed-off-by: Jake Smith <jake.smith@lexisnexisrisk.com>
Jake Smith преди 6 години
родител
ревизия
051f33bc98
променени са 3 файла, в които са добавени 141 реда и са изтрити 0 реда
  1. 67 0
      fs/dafsserver/dafsserver.cpp
  2. 72 0
      system/jlib/jdebug.cpp
  3. 2 0
      system/jlib/jdebug.hpp

+ 67 - 0
fs/dafsserver/dafsserver.cpp

@@ -3209,6 +3209,62 @@ class CRemoteFileServer : implements IRemoteFileServer, public CInterface
     unsigned targetActiveThreads;
     Linked<IPropertyTree> keyPairInfo;
 
+    class CHandleTracer
+    {
+        CTimeMon timer;
+        CriticalSection crit;
+        Owned<IFile> stdIOIFile;
+        std::vector<Owned<IFileIO>> reservedHandles;
+        unsigned handlesToReserve = 3; // need a few for pipe process to succeed
+
+        void reserveHandles()
+        {
+            if (stdIOIFile)
+            {
+                for (unsigned r=0; r<handlesToReserve; r++)
+                {
+                    IFileIO *iFileIO = stdIOIFile->open(IFOread);
+                    if (iFileIO)
+                        reservedHandles.push_back(iFileIO);
+                }
+            }
+        }
+        void releaseHandles()
+        {
+            reservedHandles.clear();
+        }
+    public:
+        CHandleTracer()
+        {
+            /* Reserve handles, so that when we run out, we hope to release them
+             * and thereby have enough to use when reading current state.
+             */
+            stdIOIFile.setown(createIFile("stdout:"));
+            timer.reset(0);
+            reserveHandles();
+        }
+        void traceIfReady()
+        {
+            CriticalBlock b(crit);
+            if (timer.timedout())
+            {
+                DBGLOG("Open handles:");
+                releaseHandles();
+                /* NB: can't guarantee that handles will be available after releaseHandles(), if other threads have allocated them.
+                 * If printLsOf fails, mark timer to retry again on next event in shorter time period.
+                 */
+                if (!printLsOf())
+                {
+                    DBGLOG("Failed to run lsof");
+                    timer.reset(1000); // next attempt in >=1 second
+                }
+                else
+                    timer.reset(60*1000); // next trace in >=1 minute
+                reserveHandles();
+            }
+        }
+    } handleTracer;
+
     int getNextHandle()
     {
         // called in sect critical block
@@ -4766,8 +4822,10 @@ public:
         }
         catch (IException *e)
         {
+            checkOutOfHandles(e);
             reply.setWritePos(posOfErr);
             formatException(reply, e, cmd, testSocketFlag, 0, client);
+            e->Release();
         }
         return testSocketFlag;
     }
@@ -4777,6 +4835,12 @@ public:
         return new cCommandProcessor();
     }
 
+    void checkOutOfHandles(IException *exception)
+    {
+        if (EMFILE == exception->errorCode())
+            handleTracer.traceIfReady();
+    }
+
     virtual void run(DAFSConnectCfg _connectMethod, const SocketEndpoint &listenep, unsigned sslPort, const SocketEndpoint *rowServiceEp, bool _rowServiceSSL, bool _rowServiceOnStdPort) override
     {
         SocketEndpoint sslep(listenep);
@@ -4950,6 +5014,7 @@ public:
                     if (exception)
                     {
                         EXCLOG(exception, "CRemoteFileServer");
+                        checkOutOfHandles(exception);
                         exception.clear();
                         sockavail = false;
                     }
@@ -4991,6 +5056,7 @@ public:
                         sockSSL.clear();
                         cleanupDaFsSocket(ssock);
                         ssock.clear();
+                        checkOutOfHandles(exception);
                         exception.clear();
                         securesockavail = false;
                     }
@@ -5025,6 +5091,7 @@ public:
                         acceptedRSSock.clear();
                         cleanupDaFsSocket(ssock);
                         ssock.clear();
+                        checkOutOfHandles(exception);
                         exception.clear();
                         rowServiceSockAvail = false;
                     }

+ 72 - 0
system/jlib/jdebug.cpp

@@ -22,6 +22,7 @@
 #include "jhash.hpp"
 #include "jmisc.hpp"
 #include "jexcept.hpp"
+#include "jfile.hpp"
 #include "jmutex.hpp"
 #include "jtime.hpp"
 #include <stdio.h>
@@ -4023,3 +4024,74 @@ jlib_decl IUserMetric *createUserMetric(const char *name, const char *matchStrin
 {
     return new UserMetricMsgHandler(name, matchString);
 }
+
+jlib_decl bool printProcessHandles(pid_t pid)
+{
+#if defined(__linux__)
+    StringBuffer curFilePathSB("/proc/");
+    if (pid)
+        curFilePathSB.append(pid);
+    else
+        curFilePathSB.append("self");
+    curFilePathSB.append("/fd/");
+    size32_t tailPos = curFilePathSB.length();
+
+    Owned<IFile> fdDir = createIFile(curFilePathSB);
+    if (!fdDir)
+    {
+        WARNLOG("Failed to create IFile for %s", curFilePathSB.str());
+        return false;
+    }
+    Owned<IDirectoryIterator> dirIter = fdDir->directoryFiles();
+    StringBuffer linkedFileNameSB, curFileNameSB;
+    char *linkedFileName = linkedFileNameSB.reserveTruncate(PATH_MAX);
+    ForEach(*dirIter)
+    {
+        dirIter->getName(curFileNameSB.clear());
+        curFilePathSB.setLength(tailPos);
+        curFilePathSB.append(curFileNameSB);
+        struct stat st;
+        int err = lstat(curFilePathSB, &st);
+        if (0 == err)
+        {
+            ssize_t sz = readlink(curFilePathSB, linkedFileName, PATH_MAX-1);
+            if (-1 != sz)
+            {
+                linkedFileNameSB.setLength(sz);
+                DBGLOG("%s -> %s", curFileNameSB.str(), linkedFileNameSB.str());
+            }
+        }
+        else
+        {
+            Owned<IException> e = makeErrnoExceptionV(errno, "Failed: err=%d", err);
+            EXCLOG(e, nullptr);
+        }
+    }
+#else
+// JCSMORE - other OS implementations
+#endif
+    return true;
+}
+
+jlib_decl bool printLsOf(pid_t pid)
+{
+#if defined(__linux__)
+    if (!pid)
+        pid = getpid();
+    // Use lsof to output handles of files and sockets
+    VStringBuffer cmd("lsof -n -P -d '^mem,^rtd,^txt,^cwd' -f -a -p %u", pid);
+    Owned<IPipeProcess> pipe = createPipeProcess();
+    if (!pipe->run("lsof", cmd, nullptr, false, true, false, 0, true))
+        return false;
+    Owned<ISimpleReadStream> stream = pipe->getOutputStream();
+    Owned<IStreamLineReader> lineReader = createLineReader(stream, false);
+    StringBuffer line;
+    while (!lineReader->readLine(line.clear()))
+        DBGLOG("%s", line.str());
+
+#else
+// JCSMORE - other OS implementations
+#endif
+    return true;
+}
+

+ 2 - 0
system/jlib/jdebug.hpp

@@ -440,6 +440,8 @@ extern jlib_decl void clearAffinityCache(); // should be called whenever the pro
 extern jlib_decl void printProcMap(const char *fn, bool printbody, bool printsummary, StringBuffer *lnout, MemoryBuffer *mb, bool useprintf);
 extern jlib_decl void PrintMemoryReport(bool full=true);
 extern jlib_decl void printAllocationSummary();
+extern jlib_decl bool printProcessHandles(pid_t pid=0); // returns false if fails
+extern jlib_decl bool printLsOf(pid_t pid=0); // returns false if fails
 extern jlib_decl bool areTransparentHugePagesEnabled(HugePageMode mode);
 extern jlib_decl HugePageMode queryTransparentHugePagesMode();
 extern jlib_decl memsize_t getHugePageSize();