Browse Source

HPCC-13663 Refactor stable quicksort to return sorted rows in place

Previously the function returned an sorted array of pointers to rows
which is an unusual interface and is harder to optimize with another
function.
Also remove the sort with two compare functions which was unused.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 10 years ago
parent
commit
fc4659e14b
6 changed files with 56 additions and 180 deletions
  1. 11 50
      ecl/hthor/hthor.cpp
  2. 0 2
      ecl/hthor/hthor.ipp
  3. 16 33
      roxie/ccd/ccdserver.cpp
  4. 26 84
      system/jlib/jsort.cpp
  5. 2 3
      system/jlib/jsort.hpp
  6. 1 8
      thorlcr/thorutil/thmem.cpp

+ 11 - 50
ecl/hthor/hthor.cpp

@@ -2528,27 +2528,28 @@ bool CHThorGroupDedupAllActivity::calcNextDedupAll()
     if (primaryCompare)
     {
         //hard, if not impossible, to hit this code once optimisations in place
-        MemoryAttr indexbuff(max*sizeof(void **));
-        void *** index = (void ***)indexbuff.bufferBase();
-        qsortvecstable((void * *)group.getArray(), max, *primaryCompare, index);
+        MemoryAttr indexbuff(max*sizeof(void *));
+        void ** temp = (void **)indexbuff.bufferBase();
+        void ** rows = (void * *)group.getArray();
+        qsortvecstableinplace(rows, max, *primaryCompare, temp);
         unsigned first = 0;
         for (unsigned idx = 1; idx < max; idx++)
         {
-            if (primaryCompare->docompare(*(index[first]), *(index[idx])) != 0)
+            if (primaryCompare->docompare(rows[first], rows[idx]) != 0)
             {
-                dedupRangeIndirect(first, idx, index);
+                dedupRange(first, idx, group);
                 first = idx;
             }
         }
-        dedupRangeIndirect(first, max, index);
+        dedupRange(first, max, group);
 
         for(unsigned idx2=0; idx2<max; ++idx2)
         {
-            void * * cur = index[idx2];
+            void * cur = rows[idx2];
             if(cur)
             {
-                LinkRoxieRow(*cur);
-                survivors.append(*cur);
+                LinkRoxieRow(cur);
+                survivors.append(cur);
             }
         }
     }
@@ -2596,34 +2597,6 @@ void CHThorGroupDedupAllActivity::dedupRange(unsigned first, unsigned last, Owne
     }
 }
 
-void CHThorGroupDedupAllActivity::dedupRangeIndirect(unsigned first, unsigned last, void *** index)
-{
-    for (unsigned idxL = first; idxL < last; idxL++)
-    {
-        if (index[idxL])
-        {
-            for (unsigned idxR = first; idxR < last; idxR++)
-            {
-                if ((idxL != idxR) && index[idxR])
-                {
-                    if (helper.matches(*(index[idxL]), *(index[idxR])))
-                    {
-                        if (keepLeft)
-                        {
-                            index[idxR] = NULL;
-                        }
-                        else
-                        {
-                            index[idxL] = NULL;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
 const void *CHThorGroupDedupAllActivity::nextInGroup()
 {
     if (!firstDone)
@@ -4019,23 +3992,11 @@ void CStableQuickSorter::performSort()
     if (numRows)
     {
         const void * * rows = rowsToSort.getBlock(numRows);
-        qsortvecstable((void * *)rows, numRows, *compare, index);
+        qsortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
         finger = 0;
     }
 }
 
-const void * CStableQuickSorter::getNextSorted()
-{
-    if(finger < rowsToSort.numCommitted())
-    {
-        const void * row = *(index[finger]);
-        *(index[finger++]) = NULL;//Clear the entry in rowsToSort so it wont get double-released
-        return row;
-    }
-    else
-        return NULL;
-}
-
 void CStableQuickSorter::killSorted()
 {
     CSimpleSorterBase::killSorted();

+ 0 - 2
ecl/hthor/hthor.ipp

@@ -509,7 +509,6 @@ public:
 private:
     bool calcNextDedupAll();
     void dedupRange(unsigned first, unsigned last, OwnedRowArray & group);
-    void dedupRangeIndirect(unsigned first, unsigned last, void *** index);
 
 private:
     IHThorDedupArg &helper;
@@ -1134,7 +1133,6 @@ public:
     virtual bool addRow(const void * next);
     virtual void spillSortedToDisk(IDiskMerger * merger);
     virtual void performSort();
-    virtual const void * getNextSorted();
     virtual void killSorted();
 
 private:

+ 16 - 33
roxie/ccd/ccdserver.cpp

@@ -6684,28 +6684,28 @@ public:
         }
     }
 
-    void dedupRangeIndirect(unsigned first, unsigned last, void *** index)
+    void dedupRange(unsigned first, unsigned last, void ** rows)
     {
         for (unsigned idxL = first; idxL < last; idxL++)
         {
-            void * left = *(index[idxL]);
+            void * left = rows[idxL];
             if (left)
             {
                 for (unsigned idxR = first; idxR < last; idxR++)
                 {
-                    void * right = *(index[idxR]);
+                    void * right = rows[idxR];
                     if ((idxL != idxR) && right)
                     {
                         if (helper.matches(left, right))
                         {
                             if (keepLeft)
                             {
-                                *(index[idxR]) = NULL;
+                                rows[idxR] = NULL;
                                 ReleaseRoxieRow(right);
                             }
                             else
                             {
-                                *(index[idxL]) = NULL;
+                                rows[idxL] = NULL;
                                 ReleaseRoxieRow(left);
                                 break;
                             }
@@ -6732,23 +6732,24 @@ public:
         if (primaryCompare)
         {
             //hard, if not impossible, to hit this code once optimisations in place
-            MemoryAttr indexbuff(max*sizeof(void **));
-            void *** index = (void ***)indexbuff.bufferBase();
-            qsortvecstable(const_cast<void * *>(group.getArray()), max, *primaryCompare, index);
+            MemoryAttr indexbuff(max*sizeof(void *));
+            void ** temp = (void **)indexbuff.bufferBase();
+            void * *rows = const_cast<void * *>(group.getArray());
+            qsortvecstableinplace(rows, max, *primaryCompare, temp);
             unsigned first = 0;
             for (unsigned idx = 1; idx < max; idx++)
             {
-                if (primaryCompare->docompare(*(index[first]), *(index[idx])) != 0)
+                if (primaryCompare->docompare(rows[first], rows[idx]) != 0)
                 {
-                    dedupRangeIndirect(first, idx, index);
+                    dedupRange(first, idx, rows);
                     first = idx;
                 }
             }
-            dedupRangeIndirect(first, max, index);
+            dedupRange(first, max, rows);
 
             for(unsigned idx2=0; idx2<max; ++idx2)
             {
-                void * cur = *(index[idx2]);
+                void * cur = rows[idx2];
                 if(cur)
                     survivors.append(cur);
             }
@@ -7458,13 +7459,7 @@ public:
             void **rows = const_cast<void * *>(sorted.getArray());
             MemoryAttr tempAttr(numRows*sizeof(void **)); // Temp storage for stable sort. This should probably be allocated from roxiemem
             void **temp = (void **) tempAttr.bufferBase();
-            memcpy(temp, rows, numRows*sizeof(void **));
-            qsortvecstable(temp, numRows, *compare, (void ***)rows);
-            for (unsigned i = 0; i < numRows; i++)
-            {
-                *rows = **((void ***)rows);
-                rows++;
-            }
+            qsortvecstableinplace(rows, numRows, *compare, temp);
         }
     }
 };
@@ -7551,13 +7546,7 @@ public:
                 {
                     MemoryAttr tempAttr(numRows*sizeof(void **)); // Temp storage for stable sort. This should probably be allocated from roxiemem
                     void **temp = (void **) tempAttr.bufferBase();
-                    memcpy(temp, rows, numRows*sizeof(void **));
-                    qsortvecstable(temp, numRows, *compare, (void ***)rows);
-                    for (unsigned i = 0; i < numRows; i++)
-                    {
-                        *rows = **((void ***)rows);
-                        rows++;
-                    }
+                    qsortvecstableinplace(rows, numRows, *compare, temp);
                 }
                 else
                     qsortvec(rows, numRows, *compare);
@@ -17771,13 +17760,7 @@ public:
                             MemoryAttr tempAttr(rightord*sizeof(void **)); // Temp storage for stable sort. This should probably be allocated from roxiemem
                             void **temp = (void **) tempAttr.bufferBase();
                             void **_rows = const_cast<void * *>(rightset.getArray());
-                            memcpy(temp, _rows, rightord*sizeof(void **));
-                            qsortvecstable(temp, rightord, *helper.queryCompareRight(), (void ***)_rows);
-                            for (unsigned i = 0; i < rightord; i++)
-                            {
-                                *_rows = **((void ***)_rows);
-                                _rows++;
-                            }
+                            qsortvecstableinplace(_rows, rightord, *helper.queryCompareRight(), temp);
                         }
                     }
                     table.setown(new ManyLookupTable(rightset, helper));  // NOTE - takes ownership of rightset

+ 26 - 84
system/jlib/jsort.cpp

@@ -505,88 +505,6 @@ void parqsortvec(void **a, size32_t n, const ICompare & compare, unsigned numcpu
 
 //---------------------------------------------------------------------------
 
-#define CMP(a,b)         cmpic2(a,b,compare1,compare2)
-#define MED3(a,b,c)      med3ic2(a,b,c,compare1,compare2)
-#define RECURSE(a,b)     qsortvec(a, b, compare1, compare2)
-static inline int cmpic2(VECTOR a, VECTOR b, const ICompare & compare1, const ICompare & compare2)
-{
-    int ret = compare1.docompare(*(a),*(b));
-    if (ret==0)
-        ret = compare2.docompare(*(a),*(b));
-    return ret;
-}
-static inline VECTOR med3ic2(VECTOR a, VECTOR b, VECTOR c, const ICompare & compare1, const ICompare & compare2)
-{
-  return CMP(a, b) < 0 ?
-      (CMP(b, c) < 0 ? b : (CMP(a, c) < 0 ? c : a ))
-    : (CMP(b, c) > 0 ? b : (CMP(a, c) < 0 ? a : c ));
-}
-
-
-void qsortvec(void **a, size32_t n, const ICompare & compare1, const ICompare & compare2)
-#include "jsort2.inc"
-
-
-class cParQSort2: public cParQSortBase
-{
-    VECTOR array;
-    const ICompare &compare1;
-    const ICompare &compare2;
-
-    void partition(unsigned s, unsigned n, unsigned &r1, unsigned &r2) // NB s, r1 and r2 are relative to array
-    {
-        DOPARTITION
-    }
-
-    void serialsort(unsigned from, unsigned len)
-    {
-        qsortvec(array+from,len,compare1,compare2);
-    }
-
-public:
-
-    cParQSort2(VECTOR _a,const ICompare &_compare1,const ICompare &_compare2, unsigned _numsubthreads)
-        : cParQSortBase(_numsubthreads),compare1(_compare1), compare2(_compare2)
-    {
-        array = _a;
-        cParQSortBase::start();
-    }
-
-};
-
-
-void parqsortvec(void **a, size32_t n, const ICompare & compare1, const ICompare & compare2,unsigned numcpus)
-{
-    if ((n<=PARALLEL_GRANULARITY)||!sortParallel(numcpus)) {
-        qsortvec(a,n,compare1,compare2);
-        return;
-    }
-    cParQSort2 sorter(a,compare1,compare2,numcpus-1);
-    sorter.subsort(0,n);
-    sorter.join();
-
-#ifdef TESTPARSORT
-    for (unsigned i=1;i<n;i++) {
-        int cmp = compare1.docompare(a[i-1],a[i]);
-        if (cmp>0)
-            ERRLOG("parqsortvec(2,1) failed %d",i);
-        else if ((cmp==0)&&(compare2.docompare(a[i-1],a[i])>0))
-            ERRLOG("parqsortvec(2,2) failed %d",i);
-    }
-#endif
-
-
-}
-
-
-
-
-#undef CMP
-#undef MED3
-#undef RECURSE
-
-//---------------------------------------------------------------------------
-
 #undef VECTOR
 #undef SWAP
 typedef void *** _IVECTOR;
@@ -657,14 +575,26 @@ public:
 #undef RECURSE
 #undef VECTOR
 
-void qsortvecstable(void ** const rows, size32_t n, const ICompare & compare, void *** index)
+static void qsortvecstable(void ** const rows, size32_t n, const ICompare & compare, void *** index)
 {
     for(unsigned i=0; i<n; ++i)
         index[i] = rows+i;
     doqsortvecstable(index, n, compare);
 }
 
-void parqsortvecstable(void ** const rows, size32_t n, const ICompare & compare, void *** index, unsigned numcpus)
+void qsortvecstableinplace(void ** rows, size32_t n, const ICompare & compare, void ** temp)
+{
+    memcpy(temp, rows, n * sizeof(void*));
+
+    qsortvecstable(temp, n, compare, (void * * *)rows);
+
+    //I'm sure this violates the aliasing rules...
+    void * * * rowsAsIndex = (void * * *)rows;
+    for(unsigned i=0; i<n; ++i)
+        rows[i] = *rowsAsIndex[i];
+}
+
+static void parqsortvecstable(void ** rows, size32_t n, const ICompare & compare, void *** index, unsigned numcpus)
 {
     for(unsigned i=0; i<n; ++i)
         index[i] = rows+i;
@@ -678,6 +608,18 @@ void parqsortvecstable(void ** const rows, size32_t n, const ICompare & compare,
 }
 
 
+void parqsortvecstableinplace(void ** rows, size32_t n, const ICompare & compare, void ** temp, unsigned numcpus)
+{
+    memcpy(temp, rows, n * sizeof(void*));
+
+    parqsortvecstable(temp, n, compare, (void * * *)rows, numcpus);
+
+    //I'm sure this violates the aliasing rules...
+    void * * * rowsAsIndex = (void * * *)rows;
+    for(unsigned i=0; i<n; ++i)
+        rows[i] = *rowsAsIndex[i];
+}
+
 
 //=========================================================================
 

+ 2 - 3
system/jlib/jsort.hpp

@@ -75,12 +75,11 @@ extern jlib_decl void qsortvec(void **a, size32_t n, const ICompare & compare);
 extern jlib_decl void qsortvec(void **a, size32_t n, const ICompare & compare1, const ICompare & compare2);
 
 // Call with n rows of data in rows, index an (uninitialized) array of size n. The function will fill index with a stably sorted index into rows.
-extern jlib_decl void qsortvecstable(void ** const rows, size32_t n, const ICompare & compare, void *** index);
+extern jlib_decl void qsortvecstableinplace(void ** rows, size32_t n, const ICompare & compare, void ** temp);
 
 
 extern jlib_decl void parqsortvec(void **a, size32_t n, const ICompare & compare, unsigned ncpus=0); // runs in parallel on multi-core
-extern jlib_decl void parqsortvec(void **a, size32_t n, const ICompare & compare1, const ICompare & compare2,unsigned ncpus=0);
-extern jlib_decl void parqsortvecstable(void ** const rows, size32_t n, const ICompare & compare, void *** index, unsigned ncpus=0); // runs in parallel on multi-core
+extern jlib_decl void parqsortvecstableinplace(void ** rows, size32_t n, const ICompare & compare, void ** temp, unsigned ncpus=0); // runs in parallel on multi-core
 
 
 // we define the heap property that no element c should be smaller than its parent (unsigned)(c-1)/2

+ 1 - 8
thorlcr/thorutil/thmem.cpp

@@ -513,14 +513,7 @@ void CThorExpandingRowArray::doSort(rowidx_t n, void **const rows, ICompare &com
             dbgassertex(NULL != stableTable);
             stableTablePtr = stableTable;
         }
-        void **_rows = rows;
-        memcpy(stableTablePtr, _rows, n*sizeof(void **));
-        parqsortvecstable(stableTablePtr, n, compare, (void ***)_rows, maxCores);
-        while (n--)
-        {
-            *_rows = **((void ***)_rows);
-            _rows++;
-        }
+        parqsortvecstableinplace(rows, n, compare, stableTablePtr, maxCores);
     }
     else
         parqsortvec((void **const)rows, n, compare, maxCores);