|
@@ -26,10 +26,12 @@
|
|
|
#include "jthread.hpp"
|
|
|
#include "jqueue.tpp"
|
|
|
#include "jset.hpp"
|
|
|
+#include "jutil.hpp"
|
|
|
|
|
|
#ifdef _USE_TBB
|
|
|
#include "tbb/task.h"
|
|
|
#include "tbb/task_scheduler_init.h"
|
|
|
+#include "tbb/parallel_sort.h"
|
|
|
#endif
|
|
|
|
|
|
#ifdef _DEBUG
|
|
@@ -39,7 +41,10 @@
|
|
|
//#define MCMERGESTATS
|
|
|
#endif
|
|
|
|
|
|
+//#define TRACE_PARTITION
|
|
|
+
|
|
|
#define PARALLEL_GRANULARITY 1024
|
|
|
+static const unsigned numPartitionSamples = 3;
|
|
|
|
|
|
static bool sortParallel(unsigned &numcpus)
|
|
|
{
|
|
@@ -241,6 +246,69 @@ void qsortvec(void **a, size32_t n, sortCompareFunction compare)
|
|
|
#undef RECURSE
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
|
+// tbb versions of the quick sort to provide a useful base comparison
|
|
|
+
|
|
|
+class TbbCompareWrapper
|
|
|
+{
|
|
|
+public:
|
|
|
+ TbbCompareWrapper(const ICompare & _compare) : compare(_compare) {}
|
|
|
+ bool operator()(void * const & l, void * const & r) const { return compare.docompare(l, r) < 0; }
|
|
|
+ const ICompare & compare;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+class TbbCompareIndirectWrapper
|
|
|
+{
|
|
|
+public:
|
|
|
+ TbbCompareIndirectWrapper(const ICompare & _compare) : compare(_compare) {}
|
|
|
+ bool operator()(void * * const & l, void * * const & r) const
|
|
|
+ {
|
|
|
+ int ret = compare.docompare(*l,*r);
|
|
|
+ if (ret==0)
|
|
|
+ {
|
|
|
+ if (l < r)
|
|
|
+ return true;
|
|
|
+ else
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return (ret < 0);
|
|
|
+ }
|
|
|
+ const ICompare & compare;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+void tbbqsortvec(void **a, size_t n, const ICompare & compare)
|
|
|
+{
|
|
|
+#ifdef _USE_TBB
|
|
|
+ TbbCompareWrapper tbbcompare(compare);
|
|
|
+ tbb::parallel_sort(a, a+n, tbbcompare);
|
|
|
+#else
|
|
|
+ throwUnexpectedX("TBB quicksort not available");
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+void tbbqsortstable(void ** rows, size_t n, const ICompare & compare, void ** temp)
|
|
|
+{
|
|
|
+#ifdef _USE_TBB
|
|
|
+ void * * * rowsAsIndex = (void * * *)rows;
|
|
|
+ memcpy(temp, rows, n * sizeof(void*));
|
|
|
+
|
|
|
+ for(unsigned i=0; i<n; ++i)
|
|
|
+ rowsAsIndex[i] = temp+i;
|
|
|
+
|
|
|
+ TbbCompareIndirectWrapper tbbcompare(compare);
|
|
|
+ tbb::parallel_sort(rowsAsIndex, rowsAsIndex+n, tbbcompare);
|
|
|
+
|
|
|
+ //I'm sure this violates the aliasing rules...
|
|
|
+ for(unsigned i=0; i<n; ++i)
|
|
|
+ rows[i] = *rowsAsIndex[i];
|
|
|
+#else
|
|
|
+ throwUnexpectedX("TBB quicksort not available");
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+//---------------------------------------------------------------------------
|
|
|
|
|
|
#define CMP(a,b) (compare.docompare(*(a),*(b)))
|
|
|
#define MED3(a,b,c) med3ic(a,b,c,compare)
|
|
@@ -621,14 +689,14 @@ void parqsortvecstableinplace(void ** rows, size32_t n, const ICompare & compare
|
|
|
|
|
|
//I'm sure this violates the aliasing rules...
|
|
|
void * * * rowsAsIndex = (void * * *)rows;
|
|
|
- for(unsigned i=0; i<n; ++i)
|
|
|
+ for(size32_t i=0; i<n; ++i)
|
|
|
rows[i] = *rowsAsIndex[i];
|
|
|
}
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
-inline void * * mergePartitions(const ICompare & compare, void * * result, unsigned n1, void * * ret1, unsigned n2, void * * ret2)
|
|
|
+inline void * * mergePartitions(const ICompare & compare, void * * result, size_t n1, void * * ret1, size_t n2, void * * ret2)
|
|
|
{
|
|
|
void * * tgt = result;
|
|
|
loop
|
|
@@ -695,6 +763,13 @@ inline void * * mergePartitions(const ICompare & compare, void * * result, size_
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
+inline void clonePartition(void * * result, size_t n, void * * src)
|
|
|
+{
|
|
|
+ void * * tgt = result;
|
|
|
+ while (n--)
|
|
|
+ *tgt++ = *src++;
|
|
|
+}
|
|
|
+
|
|
|
inline void * * mergePartitionsRev(const ICompare & compare, void * * result, size_t n1, void * * ret1, size_t n2, void * * ret2, size_t n)
|
|
|
{
|
|
|
void * * tgt = result+n1+n2-1;
|
|
@@ -731,7 +806,7 @@ inline void * * mergePartitionsRev(const ICompare & compare, void * * result, si
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
-static void * * mergeSort(void ** rows, size32_t n, const ICompare & compare, void ** tmp, unsigned depth)
|
|
|
+static void * * mergeSort(void ** rows, size_t n, const ICompare & compare, void ** tmp, unsigned depth)
|
|
|
{
|
|
|
void * * result = (depth & 1) ? tmp : rows;
|
|
|
//This could be coded to perform an "optimal" 3 element compare, but the following code is much simpler,
|
|
@@ -762,8 +837,8 @@ static void * * mergeSort(void ** rows, size32_t n, const ICompare & compare, vo
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- unsigned n1 = (n+1)/2;
|
|
|
- unsigned n2 = n - n1;
|
|
|
+ size_t n1 = (n+1)/2;
|
|
|
+ size_t n2 = n - n1;
|
|
|
void * * ret1 = mergeSort(rows, n1, compare, tmp, depth+1);
|
|
|
void * * ret2 = mergeSort(rows+n1, n2, compare, tmp + n1, depth+1);
|
|
|
dbgassertex(ret2 == ret1 + n1);
|
|
@@ -772,7 +847,7 @@ static void * * mergeSort(void ** rows, size32_t n, const ICompare & compare, vo
|
|
|
}
|
|
|
|
|
|
|
|
|
-void msortvecstableinplace(void ** rows, size32_t n, const ICompare & compare, void ** temp)
|
|
|
+void msortvecstableinplace(void ** rows, size_t n, const ICompare & compare, void ** temp)
|
|
|
{
|
|
|
if (n <= 1)
|
|
|
return;
|
|
@@ -812,7 +887,7 @@ class TbbParallelMergeSorter
|
|
|
class BisectTask : public tbb::task
|
|
|
{
|
|
|
public:
|
|
|
- BisectTask(TbbParallelMergeSorter & _sorter, void ** _rows, size32_t _n, void ** _temp, unsigned _depth, task * _next)
|
|
|
+ BisectTask(TbbParallelMergeSorter & _sorter, void ** _rows, size_t _n, void ** _temp, unsigned _depth, task * _next)
|
|
|
: sorter(_sorter), rows(_rows), n(_n), temp(_temp), depth(_depth), next(_next)
|
|
|
{
|
|
|
}
|
|
@@ -831,16 +906,34 @@ class TbbParallelMergeSorter
|
|
|
|
|
|
void * * result = (depth & 1) ? temp : rows;
|
|
|
void * * src = (depth & 1) ? rows : temp;
|
|
|
- unsigned n1 = (n+1)/2;
|
|
|
- unsigned n2 = n-n1;
|
|
|
+ size_t n1 = (n+1)/2;
|
|
|
+ size_t n2 = n-n1;
|
|
|
task * mergeTask;
|
|
|
if (depth < sorter.parallelMergeDepth)
|
|
|
{
|
|
|
- task * mergeFwdTask = new (allocate_additional_child_of(*next)) MergeTask(sorter.compare, result, n1, src, n2, src+n1, n1);
|
|
|
- mergeFwdTask->set_ref_count(1);
|
|
|
- task * mergeRevTask = new (next->allocate_child()) MergeRevTask(sorter.compare, result, n1, src, n2, src+n1, n2);
|
|
|
- mergeRevTask->set_ref_count(1);
|
|
|
- mergeTask = new (allocate_root()) SplitTask(mergeFwdTask, mergeRevTask);
|
|
|
+ unsigned partitions = sorter.numPartitionCores() >> depth;
|
|
|
+ if (partitions > 1)
|
|
|
+ {
|
|
|
+ PartitionSplitTask * splitTask = new (allocate_root()) PartitionSplitTask(n1, src, n2, src+n1, partitions, sorter.compare);
|
|
|
+ for (unsigned i=0; i < partitions; i++)
|
|
|
+ {
|
|
|
+ MergeTask * mergeFwdTask = new (allocate_additional_child_of(*next)) MergeTask(sorter.compare, result, n1, src, n2, src+n1, 0);
|
|
|
+ mergeFwdTask->set_ref_count(1);
|
|
|
+ MergeTask * mergeRevTask = new (allocate_additional_child_of(*next)) MergeRevTask(sorter.compare, result, n1, src, n2, src+n1, 0);
|
|
|
+ mergeRevTask->set_ref_count(1);
|
|
|
+ splitTask->setTasks(i, mergeFwdTask, mergeRevTask);
|
|
|
+ }
|
|
|
+ next->decrement_ref_count();
|
|
|
+ mergeTask = splitTask;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ task * mergeFwdTask = new (allocate_additional_child_of(*next)) MergeTask(sorter.compare, result, n1, src, n2, src+n1, n1);
|
|
|
+ mergeFwdTask->set_ref_count(1);
|
|
|
+ task * mergeRevTask = new (next->allocate_child()) MergeRevTask(sorter.compare, result, n1, src, n2, src+n1, n2);
|
|
|
+ mergeRevTask->set_ref_count(1);
|
|
|
+ mergeTask = new (allocate_root()) SplitTask(mergeFwdTask, mergeRevTask);
|
|
|
+ }
|
|
|
}
|
|
|
else
|
|
|
{
|
|
@@ -862,7 +955,7 @@ class TbbParallelMergeSorter
|
|
|
void ** rows;
|
|
|
void ** temp;
|
|
|
task * next;
|
|
|
- size32_t n;
|
|
|
+ size_t n;
|
|
|
unsigned depth;
|
|
|
};
|
|
|
|
|
@@ -870,7 +963,7 @@ class TbbParallelMergeSorter
|
|
|
class SubSortTask : public tbb::task
|
|
|
{
|
|
|
public:
|
|
|
- SubSortTask(TbbParallelMergeSorter & _sorter, void ** _rows, size32_t _n, void ** _temp, unsigned _depth)
|
|
|
+ SubSortTask(TbbParallelMergeSorter & _sorter, void ** _rows, size_t _n, void ** _temp, unsigned _depth)
|
|
|
: sorter(_sorter), rows(_rows), n(_n), temp(_temp), depth(_depth)
|
|
|
{
|
|
|
}
|
|
@@ -884,7 +977,7 @@ class TbbParallelMergeSorter
|
|
|
TbbParallelMergeSorter & sorter;
|
|
|
void ** rows;
|
|
|
void ** temp;
|
|
|
- size32_t n;
|
|
|
+ size_t n;
|
|
|
unsigned depth;
|
|
|
};
|
|
|
|
|
@@ -892,17 +985,39 @@ class TbbParallelMergeSorter
|
|
|
class MergeTask : public tbb::task
|
|
|
{
|
|
|
public:
|
|
|
- MergeTask(const ICompare & _compare, void * * _result, size_t _n1, void * * _src1, size_t _n2, void * * _src2, size32_t _n)
|
|
|
+ MergeTask(const ICompare & _compare, void * * _result, size_t _n1, void * * _src1, size_t _n2, void * * _src2, size_t _n)
|
|
|
: compare(_compare),result(_result), n1(_n1), src1(_src1), n2(_n2), src2(_src2), n(_n)
|
|
|
{
|
|
|
}
|
|
|
|
|
|
virtual task * execute()
|
|
|
{
|
|
|
- mergePartitions(compare, result, n1, src1, n2, src2, n);
|
|
|
+ //After the ranges are adjusted it is possible for one input to shrink to zero size (e.g., if input is sorted)
|
|
|
+ if (n1 == 0)
|
|
|
+ {
|
|
|
+ assertex(n <= n2);
|
|
|
+ clonePartition(result, n, src2);
|
|
|
+ }
|
|
|
+ else if (n2 == 0)
|
|
|
+ {
|
|
|
+ assertex(n <= n1);
|
|
|
+ clonePartition(result, n, src1);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ mergePartitions(compare, result, n1, src1, n2, src2, n);
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
+ void adjustRange(size_t deltaLeft, size_t numLeft, size_t deltaRight, size_t numRight, size_t num)
|
|
|
+ {
|
|
|
+ src1 += deltaLeft;
|
|
|
+ n1 = numLeft;
|
|
|
+ src2 += deltaRight;
|
|
|
+ n2 = numRight;
|
|
|
+ result += (deltaLeft + deltaRight);
|
|
|
+ n = num;
|
|
|
+ }
|
|
|
+
|
|
|
protected:
|
|
|
const ICompare & compare;
|
|
|
void * * result;
|
|
@@ -923,9 +1038,227 @@ class TbbParallelMergeSorter
|
|
|
|
|
|
virtual task * execute()
|
|
|
{
|
|
|
- mergePartitionsRev(compare, result, n2, src2, n1, src1, n);
|
|
|
+ if (n1 == 0)
|
|
|
+ {
|
|
|
+ assertex(n <= n2);
|
|
|
+ //This is a reverse merge, so copy n from the end of the input
|
|
|
+ unsigned delta = n2 - n;
|
|
|
+ clonePartition(result + delta, n, src2 + delta);
|
|
|
+ }
|
|
|
+ else if (n2 == 0)
|
|
|
+ {
|
|
|
+ assertex(n <= n1);
|
|
|
+ unsigned delta = n1 - n;
|
|
|
+ clonePartition(result + delta, n, src1 + delta);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ mergePartitionsRev(compare, result, n2, src2, n1, src1, n);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ class PartitionSplitTask : public tbb::task
|
|
|
+ {
|
|
|
+ public:
|
|
|
+ PartitionSplitTask(size_t _n1, void * * _src1, size_t _n2, void * * _src2, unsigned _numPartitions, const ICompare & _compare)
|
|
|
+ : numPartitions(_numPartitions), n1(_n1), n2(_n2), src1(_src1), src2(_src2), compare(_compare)
|
|
|
+ {
|
|
|
+ //These could be local variables in calculatePartitions(), but placed here to simplify cleanup. (Should consider using alloca)
|
|
|
+ posLeft = new size_t[numPartitions+1];
|
|
|
+ posRight = new size_t[numPartitions+1];
|
|
|
+ tasks = new MergeTask *[numPartitions*2];
|
|
|
+ for (unsigned i=0; i < numPartitions*2; i++)
|
|
|
+ tasks[i] = NULL;
|
|
|
+ }
|
|
|
+ ~PartitionSplitTask()
|
|
|
+ {
|
|
|
+ delete [] posLeft;
|
|
|
+ delete [] posRight;
|
|
|
+ delete [] tasks;
|
|
|
+ }
|
|
|
+
|
|
|
+ void calculatePartitions()
|
|
|
+ {
|
|
|
+#ifdef PARANOID
|
|
|
+ {
|
|
|
+ for (unsigned ix=1; ix<n1; ix++)
|
|
|
+ if (compare.docompare(src1[ix-1], src1[ix]) > 0)
|
|
|
+ printf("Failure left@%u\n", ix);
|
|
|
+ }
|
|
|
+ if (false)
|
|
|
+ {
|
|
|
+ for (unsigned ix=1; ix<n2; ix++)
|
|
|
+ if (compare.docompare(src2[ix-1], src2[ix]) > 0)
|
|
|
+ printf("Failure right@%u\n", ix);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ //If dividing into P parts, select S*P-1 even points from each side.
|
|
|
+ unsigned numSamples = numPartitionSamples*numPartitions-1;
|
|
|
+ QuantilePositionIterator iterLeft(n1, numSamples+1, false);
|
|
|
+ QuantilePositionIterator iterRight(n2, numSamples+1, false);
|
|
|
+ iterLeft.first();
|
|
|
+ iterRight.first();
|
|
|
+
|
|
|
+ size_t prevLeft = 0;
|
|
|
+ size_t prevRight =0;
|
|
|
+ posLeft[0] = 0;
|
|
|
+ posRight[0] = 0;
|
|
|
+
|
|
|
+ //From the merged list, for sample i [zero based], we can guarantee that there are at least (i+1)*(n1+n2)/numSamples*2
|
|
|
+ //rows before sample i, and at most (i+2)*(n1+n2)/numSamples*2 samples after it.
|
|
|
+ //=> pick samples [0, 2*numSamples, 4*numSamples ...]
|
|
|
+ //NOTE: Include elements at position 0 to ensure sorted inputs are partitioned evenly
|
|
|
+ for (unsigned part = 1; part < numPartitions; part++)
|
|
|
+ {
|
|
|
+ unsigned numToSkip = numPartitionSamples*2;
|
|
|
+ if (part == 1)
|
|
|
+ numToSkip++;
|
|
|
+ for (unsigned skip=numToSkip; skip-- != 0; )
|
|
|
+ {
|
|
|
+ size_t leftPos = iterLeft.get();
|
|
|
+ size_t rightPos = iterRight.get();
|
|
|
+ if (leftPos == n1)
|
|
|
+ {
|
|
|
+ if (skip == 0)
|
|
|
+ {
|
|
|
+ posLeft[part] = leftPos;
|
|
|
+ posRight[part] = rightPos;
|
|
|
+ }
|
|
|
+ iterRight.next();
|
|
|
+ }
|
|
|
+ else if (rightPos == n2)
|
|
|
+ {
|
|
|
+ if (skip == 0)
|
|
|
+ {
|
|
|
+ posLeft[part] = leftPos;
|
|
|
+ posRight[part] = rightPos;
|
|
|
+ }
|
|
|
+ iterLeft.next();
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ int c = compare.docompare(src1[leftPos], src2[rightPos]);
|
|
|
+ if (skip == 0)
|
|
|
+ {
|
|
|
+ if (c <= 0)
|
|
|
+ {
|
|
|
+ //value in left is smallest. Find the position of the value <= the left value
|
|
|
+ posLeft[part] = leftPos;
|
|
|
+ posRight[part] = findFirstGE(src1[leftPos], prevRight, rightPos, src2);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ posLeft[part] = findFirstGT(src2[rightPos], prevLeft, leftPos, src1);
|
|
|
+ posRight[part] = rightPos;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (c <= 0)
|
|
|
+ {
|
|
|
+ iterLeft.next();
|
|
|
+ prevLeft = leftPos;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ iterRight.next();
|
|
|
+ prevRight = rightPos;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ posLeft[numPartitions] = n1;
|
|
|
+ posRight[numPartitions] = n2;
|
|
|
+#ifdef TRACE_PARTITION
|
|
|
+ printf("%d,%d -> {", (unsigned)n1, (unsigned)n2);
|
|
|
+#endif
|
|
|
+ for (unsigned i= 0; i < numPartitions; i++)
|
|
|
+ {
|
|
|
+ size_t start = posLeft[i] + posRight[i];
|
|
|
+ size_t end = posLeft[i+1] + posRight[i+1];
|
|
|
+ size_t num = end - start;
|
|
|
+ size_t numFwd = num/2;
|
|
|
+#ifdef TRACE_PARTITION
|
|
|
+ printf("([%d..%d],[%d..%d] %d,%d = %d)\n",
|
|
|
+ (unsigned)posLeft[i], (unsigned)posLeft[i+1], (unsigned)posRight[i], (unsigned)posRight[i+1],
|
|
|
+ (unsigned)start, (unsigned)end, (unsigned)num);
|
|
|
+#endif
|
|
|
+
|
|
|
+ MergeTask & mergeFwdTask = *tasks[i*2];
|
|
|
+ MergeTask & mergeRevTask = *tasks[i*2+1];
|
|
|
+ mergeFwdTask.adjustRange(posLeft[i], posLeft[i+1]-posLeft[i],
|
|
|
+ posRight[i], posRight[i+1]-posRight[i],
|
|
|
+ numFwd);
|
|
|
+ mergeRevTask.adjustRange(posLeft[i], posLeft[i+1]-posLeft[i],
|
|
|
+ posRight[i], posRight[i+1]-posRight[i],
|
|
|
+ num-numFwd);
|
|
|
+ }
|
|
|
+#ifdef TRACE_PARTITION
|
|
|
+ printf("}\n");
|
|
|
+#endif
|
|
|
+ }
|
|
|
+
|
|
|
+ virtual task * execute()
|
|
|
+ {
|
|
|
+ calculatePartitions();
|
|
|
+ for (unsigned i=0; i < numPartitions*2; i++)
|
|
|
+ {
|
|
|
+ if (tasks[i]->decrement_ref_count() == 0)
|
|
|
+ spawn(*tasks[i]);
|
|
|
+ }
|
|
|
return NULL;
|
|
|
}
|
|
|
+
|
|
|
+ void setTasks(unsigned i, MergeTask * fwd, MergeTask * rev)
|
|
|
+ {
|
|
|
+ tasks[i*2] = fwd;
|
|
|
+ tasks[i*2+1] = rev;
|
|
|
+ }
|
|
|
+
|
|
|
+ protected:
|
|
|
+ size_t findFirstGE(void * seek, size_t low, size_t high, void * * rows)
|
|
|
+ {
|
|
|
+ if (low == high)
|
|
|
+ return low;
|
|
|
+ while (high - low > 1)
|
|
|
+ {
|
|
|
+ size_t mid = (low + high) / 2;
|
|
|
+ if (compare.docompare(rows[mid], seek) < 0)
|
|
|
+ low = mid;
|
|
|
+ else
|
|
|
+ high = mid;
|
|
|
+ }
|
|
|
+ if (compare.docompare(rows[low], seek) < 0)
|
|
|
+ return low+1;
|
|
|
+ return low;
|
|
|
+ }
|
|
|
+
|
|
|
+ size_t findFirstGT(void * seek, size_t low, size_t high, void * * rows)
|
|
|
+ {
|
|
|
+ if (low == high)
|
|
|
+ return low;
|
|
|
+ while (high - low > 1)
|
|
|
+ {
|
|
|
+ size_t mid = (low + high) / 2;
|
|
|
+ if (compare.docompare(rows[mid], seek) <= 0)
|
|
|
+ low = mid;
|
|
|
+ else
|
|
|
+ high = mid;
|
|
|
+ }
|
|
|
+ if (compare.docompare(rows[low], seek) <= 0)
|
|
|
+ return low+1;
|
|
|
+ return low;
|
|
|
+ }
|
|
|
+
|
|
|
+ protected:
|
|
|
+ const ICompare & compare;
|
|
|
+ unsigned numPartitions;
|
|
|
+ size_t n1;
|
|
|
+ size_t n2;
|
|
|
+ void * * src1;
|
|
|
+ void * * src2;
|
|
|
+ size_t * posLeft;
|
|
|
+ size_t * posRight;
|
|
|
+ MergeTask * * tasks;
|
|
|
};
|
|
|
|
|
|
public:
|
|
@@ -943,11 +1276,15 @@ public:
|
|
|
|
|
|
//Merge in parallel once it is likely to be beneficial
|
|
|
parallelMergeDepth = ln2NumCpus+ extraParallelMergeDepth;
|
|
|
+
|
|
|
//Aim to execute in parallel until the width is 8*the maximum number of parallel task
|
|
|
singleThreadDepth = ln2NumCpus + extraBisectDepth;
|
|
|
+ partitionCores = numCpus / 2;
|
|
|
}
|
|
|
|
|
|
- void sortRoot(void ** rows, size32_t n, void ** temp)
|
|
|
+ unsigned numPartitionCores() const { return partitionCores; }
|
|
|
+
|
|
|
+ void sortRoot(void ** rows, size_t n, void ** temp)
|
|
|
{
|
|
|
task * end = new (task::allocate_root()) tbb::empty_task();
|
|
|
end->set_ref_count(1+1);
|
|
@@ -961,11 +1298,12 @@ public:
|
|
|
const ICompare & compare;
|
|
|
unsigned singleThreadDepth;
|
|
|
unsigned parallelMergeDepth;
|
|
|
+ unsigned partitionCores;
|
|
|
void * * baseRows;
|
|
|
};
|
|
|
|
|
|
//-------------------------------------------------------------------------------------------------------------------
|
|
|
-void parmsortvecstableinplace(void ** rows, size32_t n, const ICompare & compare, void ** temp, unsigned ncpus)
|
|
|
+void parmsortvecstableinplace(void ** rows, size_t n, const ICompare & compare, void ** temp, unsigned ncpus)
|
|
|
{
|
|
|
if ((n <= singleThreadedMSortThreshold) || ncpus == 1)
|
|
|
{
|
|
@@ -977,9 +1315,9 @@ void parmsortvecstableinplace(void ** rows, size32_t n, const ICompare & compare
|
|
|
sorter.sortRoot(rows, n, temp);
|
|
|
}
|
|
|
#else
|
|
|
-void parmsortvecstableinplace(void ** rows, size32_t n, const ICompare & compare, void ** temp, unsigned ncpus)
|
|
|
+void parmsortvecstableinplace(void ** rows, size_t n, const ICompare & compare, void ** temp, unsigned ncpus)
|
|
|
{
|
|
|
- parqsortvecstableinplace(rows, n, compare, temp, ncpus);
|
|
|
+ parqsortvecstableinplace(rows, (size32_t)n, compare, temp, ncpus);
|
|
|
}
|
|
|
#endif
|
|
|
|