Bläddra i källkod

Merge branch 'candidate-7.0.6' into candidate-7.2.0

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 6 år sedan
förälder
incheckning
a3dfc0d0f8

+ 12 - 21
plugins/py3embed/py3embed.cpp

@@ -310,6 +310,15 @@ public:
             if (pythonLibrary)
                 FreeSharedObject(pythonLibrary);
         }
+        else
+        {
+            // Need to avoid releasing the associated py objects when these members destructors are called.
+            namedtuple.getClear();
+            namedtupleTypes.getClear();
+            compiledScripts.getClear();
+            preservedScopes.getClear();
+
+        }
     }
     bool isInitialized()
     {
@@ -554,28 +563,10 @@ MODULE_INIT(INIT_PRIORITY_STANDARD)
         }
     }
 #else
-    FILE *diskfp = fopen("/proc/self/maps", "r");
-    if (diskfp)
+    StringBuffer modname;
+    if (findLoadedModule(modname, "libpy3embed"))
     {
-        char ln[_MAX_PATH];
-        while (fgets(ln, sizeof(ln), diskfp))
-        {
-            if (strstr(ln, "libpy3embed"))
-            {
-                const char *fullName = strchr(ln, '/');
-                if (fullName)
-                {
-                    char *tail = (char *) strstr(fullName, SharedObjectExtension);
-                    if (tail)
-                    {
-                        tail[strlen(SharedObjectExtension)] = 0;
-                        keepLoadedHandle = LoadSharedObject(fullName, false, false);
-                        break;
-                    }
-                }
-            }
-        }
-        fclose(diskfp);
+        keepLoadedHandle = LoadSharedObject(modname, false, false);
     }
 #endif
     return true;

+ 10 - 0
plugins/pyembed/pyembed.cpp

@@ -29,6 +29,7 @@
 #include "platform.h"
 #include "frameobject.h"
 #include "jexcept.hpp"
+#include "jutil.hpp"
 #include "jthread.hpp"
 #include "jregexp.hpp"
 #include "hqlplugins.hpp"
@@ -303,6 +304,15 @@ public:
             if (pythonLibrary)
                 FreeSharedObject(pythonLibrary);
         }
+        else
+        {
+            // Need to avoid releasing the associated py objects when these members destructors are called.
+            namedtuple.getClear();
+            namedtupleTypes.getClear();
+            compiledScripts.getClear();
+            preservedScopes.getClear();
+
+        }
     }
     bool isInitialized()
     {

+ 12 - 1
roxie/roxiemem/roxiemem.cpp

@@ -293,8 +293,19 @@ static void initializeHeap(bool allowHugePages, bool allowTransparentHugePages,
     if (!heapBase)
     {
         const memsize_t hugePageSize = getHugePageSize();
-        bool useTransparentHugePages = allowTransparentHugePages && areTransparentHugePagesEnabled();
+        HugePageMode mode = queryTransparentHugePagesMode();
+        bool hasTransparenHugePages = areTransparentHugePagesEnabled(mode);
+        bool useTransparentHugePages = allowTransparentHugePages && hasTransparenHugePages;
         memsize_t heapAlignment = useTransparentHugePages ? hugePageSize : HEAP_ALIGNMENT_SIZE;
+        if (mode == HugePageMode::Always)
+        {
+            //Always return memory in multiples of the huge page size - even if it is not being used
+            heapAlignment = hugePageSize;
+            OERRLOG("WARNING: The OS is configured to always use transparent huge pages.  This may cause unexplained pauses "
+                    "while transparent huge pages are coalesced. The recommended setting for "
+                    "/sys/kernel/mm/transparent_hugepage/enabled is madvise");
+        }
+
         if (heapAlignment < HEAP_ALIGNMENT_SIZE)
             heapAlignment = HEAP_ALIGNMENT_SIZE;
 

+ 1 - 1
system/jhtree/jhtree.cpp

@@ -279,7 +279,7 @@ void SegMonitorList::finish(unsigned keyedSize)
             if (offset == keyedSize)
             {
                 DBGLOG("SegMonitor record does not match key");  // Can happen when reading older indexes that don't save key information in metadata properly
-                keySegCount - segMonitors.length();
+                keySegCount = segMonitors.length();
                 break;
             }
             size32_t size = recInfo.getFixedOffset(idx+1) - offset;

+ 14 - 3
system/jlib/jdebug.cpp

@@ -3194,21 +3194,32 @@ void PrintMemoryReport(bool full)
 #endif
 
 
-bool areTransparentHugePagesEnabled()
+bool areTransparentHugePagesEnabled(HugePageMode mode)
+{
+    return (mode != HugePageMode::Never) && (mode != HugePageMode::Unknown);
+}
+
+HugePageMode queryTransparentHugePagesMode()
 {
 #ifdef __linux__
     StringBuffer contents;
     try
     {
         contents.loadFile("/sys/kernel/mm/transparent_hugepage/enabled");
-        return !strstr(contents.str(), "[never]");
+        if (strstr(contents.str(), "[never]"))
+            return HugePageMode::Never;
+        if (strstr(contents.str(), "[madvise]"))
+            return HugePageMode::Madvise;
+        if (strstr(contents.str(), "[always]"))
+            return HugePageMode::Always;
     }
     catch (IException * e)
     {
         e->Release();
     }
+    return HugePageMode::Unknown;
 #endif
-    return false;
+    return HugePageMode::Never;
 }
 
 memsize_t getHugePageSize()

+ 3 - 1
system/jlib/jdebug.hpp

@@ -333,6 +333,7 @@ unsigned jlib_decl setAllocHook(bool on);  // bwd compat returns unsigned
  #define USE_JLIB_ALLOC_HOOK
 #endif
 
+enum class HugePageMode { Always, Madvise, Never, Unknown };
 extern jlib_decl void getHardwareInfo(HardwareInfo &hdwInfo, const char *primDiskPath = NULL, const char *secDiskPath = NULL);
 extern jlib_decl void getProcessTime(UserSystemTime_t & time);
 extern jlib_decl memsize_t getMapInfo(const char *type);
@@ -345,7 +346,8 @@ extern jlib_decl void clearAffinityCache(); // should be called whenever the pro
 extern jlib_decl void printProcMap(const char *fn, bool printbody, bool printsummary, StringBuffer *lnout, MemoryBuffer *mb, bool useprintf);
 extern jlib_decl void PrintMemoryReport(bool full=true);
 extern jlib_decl void printAllocationSummary();
-extern jlib_decl bool areTransparentHugePagesEnabled();
+extern jlib_decl bool areTransparentHugePagesEnabled(HugePageMode mode);
+extern jlib_decl HugePageMode queryTransparentHugePagesMode();
 extern jlib_decl memsize_t getHugePageSize();
 
 #endif

+ 73 - 57
thorlcr/activities/lookupjoin/thlookupjoinslave.cpp

@@ -1740,6 +1740,7 @@ protected:
     Owned<IRowWriter> overflowWriteStream;
     rowcount_t overflowWriteCount;
     OwnedMalloc<IChannelDistributor *> channelDistributors;
+    unsigned nextRhsToSpill = 0;
 
     inline bool isSmart() const { return smart; }
     inline void setFailoverToLocal()
@@ -1759,25 +1760,36 @@ protected:
     inline bool isRhsCollated() const { return rhsCollated; }
     rowidx_t clearNonLocalRows(CThorRowArrayWithFlushMarker &rows, unsigned slave)
     {
-        CThorArrayLockBlock block(rows);
         rowidx_t clearedRows = 0;
         rowidx_t committedRows = rows.numCommitted();
         ActPrintLog("clearNonLocalRows[slave=%u], numCommitted=%" RIPF "u, totalRows(inc uncommitted)=%" RIPF "u, flushMarker=%" RIPF "u", slave, committedRows, rows.queryTotalRows(), rows.flushMarker);
+
+        const void **_rows = rows.getBlock(committedRows);
         for (rowidx_t r=rows.flushMarker; r<committedRows; r++)
         {
-            unsigned hv = rightHash->hash(rows.query(r));
-            if (myNodeNum != (hv % numNodes))
+            const void *row = _rows[r];
+            if (row) // NB: rows can be null if OOM event flushed and saved row arrays, in which case flushMarker will have been reset
             {
-                OwnedConstThorRow row = rows.getClear(r); // dispose of
-                ++clearedRows;
+                unsigned hv = rightHash->hash(row);
+                if (myNodeNum != (hv % numNodes))
+                {
+                    ReleaseThorRow(row);
+                    _rows[r] = nullptr;
+                    ++clearedRows;
+                }
             }
         }
-        /* Record point to which clearNonLocalRows will reach
-         * so that can resume from that point, when recalled.
+        /* Record point that clearNonLocalRows reached,
+         * so that can resume from that point when recalled.
          */
         rows.flushMarker = committedRows;
         return clearedRows;
     }
+    rowidx_t clearNonLocalRowsProtected(CThorRowArrayWithFlushMarker &rows, unsigned slave)
+    {
+        CThorArrayLockBlock block(rows);
+        return clearNonLocalRows(rows, slave);
+    }
     // Annoyingly similar to above, used post broadcast when rhsSlaveRows collated into 'rhs'
     rowidx_t clearNonLocalRows(CThorExpandingRowArray &rows)
     {
@@ -1822,7 +1834,7 @@ protected:
                 ForEachItemIn(slave, rhsSlaveRows)
                 {
                     CThorRowArrayWithFlushMarker &rows = *rhsSlaveRows.item(slave);
-                    clearedRows += clearNonLocalRows(rows, slave);
+                    clearedRows += clearNonLocalRowsProtected(rows, slave);
                 }
             }
             ActPrintLog("handleLowMem: clearedRows = %" RIPF "d", clearedRows);
@@ -1831,22 +1843,33 @@ protected:
         }
         if (spillRowArrays) // only do if have to due to memory pressure. Not via foreign node notification.
         {
-            // no non-locals left to spill, so flush a rhsSlaveRows array
-            ForEachItemIn(slave, rhsSlaveRows)
+            // NB: round robin through row-arrays, to avoid spilling the same row arrays that have had a few new rows added
+            unsigned startRhsToSpill = nextRhsToSpill;
+            do
             {
-                CThorRowArrayWithFlushMarker &rows = *rhsSlaveRows.item(slave);
+                unsigned curRhsToSpill = nextRhsToSpill;
+                if (++nextRhsToSpill == rhsSlaveRows.ordinality())
+                    nextRhsToSpill = 0;
+
+                CThorRowArrayWithFlushMarker &rows = *rhsSlaveRows.item(curRhsToSpill);
                 if (rows.numCommitted())
                 {
-                    clearNonLocalRows(rows, slave);
-                    rows.flushMarker = 0; // reset marker, since save will cause numCommitted to shrink
-                    VStringBuffer tempPrefix("spill_%d", container.queryId());
-                    StringBuffer tempName;
-                    GetTempName(tempName, tempPrefix.str(), true);
-                    Owned<CFileOwner> file = new CFileOwner(createIFile(tempName.str()));
-                    VStringBuffer spillPrefixStr("clearAllNonLocalRows(%d)", SPILL_PRIORITY_SPILLABLE_STREAM);
-
-                    // 3rd param. is skipNulls = true, the row arrays may have had the non-local rows delete already.
-                    rows.save(file->queryIFile(), spillCompInfo, true, spillPrefixStr.str()); // saves committed rows
+                    Owned<CFileOwner> file;
+                    {
+                        // NB: rows may still be added to the row arrays, so protect array whilst saving
+                        CThorArrayLockBlock block(rows);
+                        if (rows.numCommitted() == clearNonLocalRows(rows, curRhsToSpill)) // this is to clear out any stragglers that can get added whilst initial fail over to local was happening.
+                            continue; // all rows were cleared, no local rows remain, so nothing to save. Skip to next row set
+
+                        VStringBuffer tempPrefix("spill_%d", container.queryId());
+                        StringBuffer tempName;
+                        GetTempName(tempName, tempPrefix.str(), true);
+                        file.setown(new CFileOwner(createIFile(tempName.str())));
+                        VStringBuffer spillPrefixStr("clearAllNonLocalRows(%d)", SPILL_PRIORITY_SPILLABLE_STREAM);
+                        // 3rd param. is skipNulls = true, the row arrays may have had the non-local rows delete already.
+                        rows.save(file->queryIFile(), spillCompInfo, true, spillPrefixStr.str()); // saves committed rows
+                        rows.flushMarker = 0; // reset because array will be moved as a consequence of further adds, so next scan must be from start
+                    }
 
                     unsigned rwFlags = DEFAULT_RWFLAGS;
                     if (spillCompInfo)
@@ -1858,6 +1881,7 @@ protected:
                     return true;
                 }
             }
+            while (nextRhsToSpill != startRhsToSpill);
         }
         return false;
     }
@@ -1921,20 +1945,7 @@ protected:
         }
         else
         {
-            /* NB: If cleared before rhsCollated, then need to clear non-locals that were added after spill
-             * There should not be many, as broadcast starts to stop as soon as a slave notifies it is spilling
-             * and ignores all non-locals.
-             */
-
-            rhsSlaveRows.sort(sortBySize); // because want biggest compacted/consumed 1st
-            ForEachItemIn(slave, rhsSlaveRows)
-            {
-                CThorRowArrayWithFlushMarker &rows = *rhsSlaveRows.item(slave);
-                clearNonLocalRows(rows, slave);
-
-                ActPrintLog("Compacting rhsSlaveRows[%u], has %" RIPF "u rows", slave, rows.numCommitted());
-                rows.compact();
-            }
+            rhsSlaveRows.sort(sortBySize); // because want biggest consumed 1st
 
             // NB: Some streams may have already been added to gatheredRHSNodeStreams, as a result of previous spilling
             for (unsigned a=0; a<rhsSlaveRows.ordinality(); a++)
@@ -2022,15 +2033,11 @@ protected:
             doBroadcastRHS(stopping);
 
             rowidx_t rhsRows = 0;
-            bool globalBroadcastSpilt = false;
             {
                 CriticalBlock b(broadcastSpillingLock);
                 rhsRows = getGlobalRHSTotal(); // flushes all rhsSlaveRows arrays to calculate total.
                 if (hasFailedOverToLocal())
-                {
                     overflowWriteStream.clear(); // broadcast has finished, no more can be written
-                    globalBroadcastSpilt = true;
-                }
             }
             if (!hasFailedOverToLocal())
             {
@@ -2111,17 +2118,35 @@ protected:
 
                 rightRowManager->removeRowBuffer(this);
 
-                if (!globalBroadcastSpilt && hasFailedOverToLocal()) // i.e. global broadcast didn't spill, but has since
+                ActPrintLog("Broadcasting final spilt status: %s", hasFailedOverToLocal() ? "spilt" : "did not spill");
+                // NB: Will cause other slaves to flush non-local if any have and failedOverToLocal will be set on all
+                doBroadcastStop(broadcast2MpTag, hasFailedOverToLocal() ? bcastflag_spilt : bcastflag_null);
+
+                if (hasFailedOverToLocal())
                 {
+                    // If HT sized already and now spilt, it's too big. Clear for re-use by handleLocalRHS()
+                    clearHT();
+                    marker.reset();
+                }
+
+                if (!rhsCollated) // NB: could have spilt after collated
+                {
+                    // Can now clean/prepare remaining row arrays for next stages
                     ForEachItemIn(a, rhsSlaveRows)
                     {
-                        CThorSpillableRowArray &rows = *rhsSlaveRows.item(a);
-                        rows.flush();
+                        CThorRowArrayWithFlushMarker &rows = *rhsSlaveRows.item(a);
+                        rows.flush(true); // If the row array was spilt, force to relocate so it can be compacted
+
+                        /* NB: need to clear non-locals that were added after spill
+                         * There should not be many, as broadcast starts to stop as soon as a slave notifies it is spilling
+                         * and ignores all non-locals.
+                         */
+                        clearNonLocalRows(rows, a);
+
+                        ActPrintLog("Compacting rhsSlaveRows[%u], has %" RIPF "u rows", a, rows.numCommitted());
+                        rows.compact();
                     }
                 }
-                ActPrintLog("Broadcasting final spilt status: %s", hasFailedOverToLocal() ? "spilt" : "did not spill");
-                // NB: Will cause other slaves to flush non-local if any have and failedOverToLocal will be set on all
-                doBroadcastStop(broadcast2MpTag, hasFailedOverToLocal() ? bcastflag_spilt : bcastflag_null);
             }
             InterChannelBarrier();
             ActPrintLog("Shared memory manager memory report");
@@ -2212,7 +2237,7 @@ protected:
                 atomic_set(&spilt, 0);
                 //NB: all channels will have done this, before rows are added
             }
-#define HPCC_17331 // Whilst under investigation
+#define HPCC_17331 // Whilst under investigation. Should be solved by fix for HPCC-21091
             void process(IRowStream *right)
             {
 #ifdef HPCC_17331
@@ -2473,13 +2498,6 @@ protected:
                     if (grouped)
                         throw MakeActivityException(this, 0, "Degraded to Distributed Local Lookup, but input is marked as grouped and cannot preserve LHS order");
 
-                    if (0 == queryJobChannelNumber())
-                    {
-                        // If HT sized already and now spilt, it's too big. Clear for re-use by handleLocalRHS()
-                        clearHT();
-                        marker.reset();
-                    }
-
                     ICompare *cmp = rhsCollated ? NULL : compareRight; // if rhsCollated=true, then sorted, otherwise can't rely on any previous order.
                     rightCollector.setown(handleFailoverToLocalRHS(cmp));
                     if (rightCollector->hasSpilt())
@@ -2707,6 +2725,7 @@ public:
                 }
             }
             failedOverToStandard = false;
+            nextRhsToSpill = 0;
         }
     }
     CATCH_NEXTROW()
@@ -2822,7 +2841,7 @@ public:
         rows.clearRows();
         return localRows.ordinality();
     }
-    virtual bool addRHSRows(CThorSpillableRowArray &rhsRows, CThorExpandingRowArray &inRows, CThorExpandingRowArray &rhsInRowsTemp)
+    virtual bool addRHSRows(CThorSpillableRowArray &rhsRows, CThorExpandingRowArray &inRows, CThorExpandingRowArray &rhsInRowsTemp) override
     {
         dbgassertex(0 == rhsInRowsTemp.ordinality());
         if (hasFailedOverToLocal())
@@ -2831,9 +2850,6 @@ public:
                 return true;
         }
         CriticalBlock b(rhsRowLock);
-        /* NB: If PARENT::addRHSRows fails, it will cause clearAllNonLocalRows() to have been triggered and failedOverToLocal to be set
-         * When all is done, a last pass is needed to clear out non-locals
-         */
         if (overflowWriteFile)
         {
             /* Tried to do outside crit above, but if empty, and now overflow, need to inside

+ 4 - 4
thorlcr/thorutil/thmem.cpp

@@ -1455,8 +1455,8 @@ rowidx_t CThorSpillableRowArray::save(IFile &iFile, unsigned _spillCompInfo, boo
     firstRow += n;
     offset_t bytesWritten = writer->getPosition();
     writer.clear();
-    ActPrintLog(&activity, "%s: CThorSpillableRowArray::save done, rows written = %" RIPF "u, bytes = %" I64F "u", _tracingPrefix, rowsWritten, (__int64)bytesWritten);
-    return n;
+    ActPrintLog(&activity, "%s: CThorSpillableRowArray::save done, rows written = %" RIPF "u, bytes = %" I64F "u, firstRow = %u", _tracingPrefix, rowsWritten, (__int64)bytesWritten, firstRow);
+    return rowsWritten;
 }
 
 
@@ -1502,10 +1502,10 @@ bool CThorSpillableRowArray::shrink()
     return maxRows != prevMaxRows;
 }
 
-bool CThorSpillableRowArray::flush()
+bool CThorSpillableRowArray::flush(bool force)
 {
     CThorArrayLockBlock block(*this);
-    return _flush(false);
+    return _flush(force);
 }
 
 bool CThorSpillableRowArray::appendRows(CThorExpandingRowArray &inRows, bool takeOwnership)

+ 2 - 2
thorlcr/thorutil/thmem.hpp

@@ -438,7 +438,7 @@ public:
     roxiemem::IRowManager *queryRowManager() const { return CThorExpandingRowArray::queryRowManager(); }
     void kill();
     void compact();
-    bool flush();
+    bool flush(bool force=false);
     bool shrink();
     inline bool isFlushed() const { return numRows == numCommitted(); }
     inline bool append(const void *row) __attribute__((warn_unused_result))
@@ -515,6 +515,7 @@ public:
     bool shrink(rowidx_t requiredRows);
     void transferRowsCopy(const void **outRows, bool takeOwnership);
     void readBlock(const void **outRows, rowidx_t readRows);
+    const void **getBlock(rowidx_t readRows);
 
     virtual IThorArrayLock &queryLock() { return *this; }
 // IThorArrayLock
@@ -523,7 +524,6 @@ public:
 
 private:
     void clearRows();
-    const void **getBlock(rowidx_t readRows);
 };