|
@@ -18,7 +18,6 @@
|
|
#include <grass/raster.h>
|
|
#include <grass/raster.h>
|
|
#include <grass/glocale.h>
|
|
#include <grass/glocale.h>
|
|
|
|
|
|
-/* TODO: replace long with either size_t or a guaranteed 64 bit integer */
|
|
|
|
struct bin
|
|
struct bin
|
|
{
|
|
{
|
|
size_t origin;
|
|
size_t origin;
|
|
@@ -34,11 +33,11 @@ static DCELL *quants;
|
|
static int num_slots;
|
|
static int num_slots;
|
|
static size_t *slots;
|
|
static size_t *slots;
|
|
static DCELL slot_size;
|
|
static DCELL slot_size;
|
|
-/* total should be a 64bit integer */
|
|
|
|
-static unsigned long total;
|
|
|
|
|
|
+static grass_int64 total;
|
|
static size_t num_values;
|
|
static size_t num_values;
|
|
static unsigned short *slot_bins;
|
|
static unsigned short *slot_bins;
|
|
-static int num_bins;
|
|
|
|
|
|
+static int num_bins_alloc;
|
|
|
|
+static int num_bins_used;
|
|
static struct bin *bins;
|
|
static struct bin *bins;
|
|
static DCELL *values;
|
|
static DCELL *values;
|
|
|
|
|
|
@@ -53,12 +52,38 @@ static inline int get_slot(DCELL c)
|
|
return i;
|
|
return i;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/* get zero-based rank for quantile */
|
|
|
|
+/* generic formula for one-based rank
|
|
|
|
+ * rank = quant * (N + 1 - 2C) + C
|
|
|
|
+ * with quant = quantile, N = number of values, C = constant
|
|
|
|
+ * common values for C:
|
|
|
|
+ * C = 0
|
|
|
|
+ * rank = quant * (N + 1)
|
|
|
|
+ * recommended by NIST (National Institute of Standards and Technology)
|
|
|
|
+ * https://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
|
|
|
|
+ * C = 0.5
|
|
|
|
+ * rank = quant * N + 0.5
|
|
|
|
+ * Matlab
|
|
|
|
+ * C = 1
|
|
|
|
+ * rank = quant * (N - 1) + 1
|
|
|
|
+ * numpy, R, MS Excel, ...
|
|
|
|
+ * Noted as an alternative by NIST */
|
|
static inline double get_quantile(int n)
|
|
static inline double get_quantile(int n)
|
|
{
|
|
{
|
|
- if (n >= num_quants)
|
|
|
|
|
|
+ double rnk;
|
|
|
|
+
|
|
|
|
+ if (n >= num_quants) {
|
|
|
|
+ /* stop condition for initialize_bins() */
|
|
return (double)total + total;
|
|
return (double)total + total;
|
|
|
|
+ }
|
|
|
|
|
|
- return (double)total * quants[n];
|
|
|
|
|
|
+ rnk = quants[n] * (total - 1);
|
|
|
|
+ if (rnk < 0)
|
|
|
|
+ rnk = 0;
|
|
|
|
+ if (rnk > total - 1)
|
|
|
|
+ rnk = total - 1;
|
|
|
|
+
|
|
|
|
+ return rnk;
|
|
}
|
|
}
|
|
|
|
|
|
static void get_slot_counts(int infile)
|
|
static void get_slot_counts(int infile)
|
|
@@ -99,18 +124,26 @@ static void initialize_bins(void)
|
|
int bin = 0;
|
|
int bin = 0;
|
|
size_t accum = 0;
|
|
size_t accum = 0;
|
|
int quant = 0;
|
|
int quant = 0;
|
|
|
|
+ int use_next_slot = 0;
|
|
|
|
|
|
G_message(_("Computing bins"));
|
|
G_message(_("Computing bins"));
|
|
|
|
|
|
num_values = 0;
|
|
num_values = 0;
|
|
next = get_quantile(quant);
|
|
next = get_quantile(quant);
|
|
|
|
|
|
|
|
+ /* for a given quantile, two bins might be needed
|
|
|
|
+ * if the index for this quantile is
|
|
|
|
+ * > accumulated count of current bin
|
|
|
|
+ * and
|
|
|
|
+ * < accumulated count of next bin */
|
|
|
|
+
|
|
for (slot = 0; slot < num_slots; slot++) {
|
|
for (slot = 0; slot < num_slots; slot++) {
|
|
size_t count = slots[slot];
|
|
size_t count = slots[slot];
|
|
size_t accum2 = accum + count;
|
|
size_t accum2 = accum + count;
|
|
|
|
|
|
- if (accum2 > next ||
|
|
|
|
- (slot == num_slots - 1 && accum2 == next)) {
|
|
|
|
|
|
+ if (count > 0 &&
|
|
|
|
+ (accum2 > next || use_next_slot) &&
|
|
|
|
+ bin < num_bins_alloc) {
|
|
struct bin *b = &bins[bin];
|
|
struct bin *b = &bins[bin];
|
|
|
|
|
|
slot_bins[slot] = ++bin;
|
|
slot_bins[slot] = ++bin;
|
|
@@ -121,18 +154,24 @@ static void initialize_bins(void)
|
|
b->min = min + slot_size * slot;
|
|
b->min = min + slot_size * slot;
|
|
b->max = min + slot_size * (slot + 1);
|
|
b->max = min + slot_size * (slot + 1);
|
|
|
|
|
|
- while (accum2 > next)
|
|
|
|
- next = get_quantile(++quant);
|
|
|
|
|
|
+ use_next_slot = 0;
|
|
|
|
+
|
|
|
|
+ if (accum2 - next < 1) {
|
|
|
|
+ use_next_slot = 1;
|
|
|
|
+ }
|
|
|
|
+ else {
|
|
|
|
+ while (accum2 > next)
|
|
|
|
+ next = get_quantile(++quant);
|
|
|
|
+ }
|
|
|
|
|
|
num_values += count;
|
|
num_values += count;
|
|
}
|
|
}
|
|
-
|
|
|
|
accum = accum2;
|
|
accum = accum2;
|
|
}
|
|
}
|
|
|
|
|
|
- num_bins = bin;
|
|
|
|
|
|
+ num_bins_used = bin;
|
|
|
|
|
|
- G_debug(1, "Number of bins: %d", num_bins);
|
|
|
|
|
|
+ G_debug(1, "Number of used bins: %d", num_bins_used);
|
|
G_debug(1, "Number of values: %lu", num_values);
|
|
G_debug(1, "Number of values: %lu", num_values);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -188,7 +227,7 @@ static void sort_bins(void)
|
|
|
|
|
|
G_message(_("Sorting bins"));
|
|
G_message(_("Sorting bins"));
|
|
|
|
|
|
- for (bin = 0; bin < num_bins; bin++) {
|
|
|
|
|
|
+ for (bin = 0; bin < num_bins_used; bin++) {
|
|
struct bin *b = &bins[bin];
|
|
struct bin *b = &bins[bin];
|
|
|
|
|
|
qsort(&values[b->base], b->count, sizeof(DCELL), compare_dcell);
|
|
qsort(&values[b->base], b->count, sizeof(DCELL), compare_dcell);
|
|
@@ -209,22 +248,17 @@ static void compute_quantiles(int recode)
|
|
double k, v;
|
|
double k, v;
|
|
size_t i0, i1;
|
|
size_t i0, i1;
|
|
|
|
|
|
- for (; bin < num_bins; bin++) {
|
|
|
|
|
|
+ for (; bin < num_bins_used; bin++) {
|
|
b = &bins[bin];
|
|
b = &bins[bin];
|
|
if (b->origin + b->count >= next)
|
|
if (b->origin + b->count >= next)
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
- if (bin < num_bins) {
|
|
|
|
|
|
+ if (bin < num_bins_used) {
|
|
k = next - b->origin;
|
|
k = next - b->origin;
|
|
i0 = (size_t)floor(k);
|
|
i0 = (size_t)floor(k);
|
|
i1 = (size_t)ceil(k);
|
|
i1 = (size_t)ceil(k);
|
|
|
|
|
|
- if (i0 > b->count - 1)
|
|
|
|
- i0 = b->count - 1;
|
|
|
|
- if (i1 > b->count - 1)
|
|
|
|
- i1 = b->count - 1;
|
|
|
|
-
|
|
|
|
v = (i0 == i1)
|
|
v = (i0 == i1)
|
|
? values[b->base + i0]
|
|
? values[b->base + i0]
|
|
: values[b->base + i0] * (i1 - k) +
|
|
: values[b->base + i0] * (i1 - k) +
|
|
@@ -258,6 +292,7 @@ int main(int argc, char *argv[])
|
|
int recode;
|
|
int recode;
|
|
int infile;
|
|
int infile;
|
|
struct FPRange range;
|
|
struct FPRange range;
|
|
|
|
+ int num_slots_max;
|
|
|
|
|
|
G_gisinit(argv[0]);
|
|
G_gisinit(argv[0]);
|
|
|
|
|
|
@@ -301,7 +336,7 @@ int main(int argc, char *argv[])
|
|
flag.r = G_define_flag();
|
|
flag.r = G_define_flag();
|
|
flag.r->key = 'r';
|
|
flag.r->key = 'r';
|
|
flag.r->description = _("Generate recode rules based on quantile-defined intervals");
|
|
flag.r->description = _("Generate recode rules based on quantile-defined intervals");
|
|
-
|
|
|
|
|
|
+
|
|
if (G_parser(argc, argv))
|
|
if (G_parser(argc, argv))
|
|
exit(EXIT_FAILURE);
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
|
@@ -341,17 +376,29 @@ int main(int argc, char *argv[])
|
|
Rast_read_fp_range(opt.input->answer, "", &range);
|
|
Rast_read_fp_range(opt.input->answer, "", &range);
|
|
Rast_get_fp_range_min_max(&range, &min, &max);
|
|
Rast_get_fp_range_min_max(&range, &min, &max);
|
|
|
|
|
|
|
|
+ rows = Rast_window_rows();
|
|
|
|
+ cols = Rast_window_cols();
|
|
|
|
+
|
|
|
|
+ /* minimum 1000 values per slot to reduce memory consumption */
|
|
|
|
+ num_slots_max = ((size_t)rows * cols) / 1000;
|
|
|
|
+ if (num_slots_max < 1)
|
|
|
|
+ num_slots_max = 1;
|
|
|
|
+ if (num_slots > num_slots_max) {
|
|
|
|
+ G_message(_("Reducing number of bins from %d to %d"),
|
|
|
|
+ num_slots, num_slots_max);
|
|
|
|
+ num_slots = num_slots_max;
|
|
|
|
+ }
|
|
|
|
+
|
|
slots = G_calloc(num_slots, sizeof(size_t));
|
|
slots = G_calloc(num_slots, sizeof(size_t));
|
|
slot_bins = G_calloc(num_slots, sizeof(unsigned short));
|
|
slot_bins = G_calloc(num_slots, sizeof(unsigned short));
|
|
|
|
|
|
slot_size = (max - min) / num_slots;
|
|
slot_size = (max - min) / num_slots;
|
|
|
|
|
|
- rows = Rast_window_rows();
|
|
|
|
- cols = Rast_window_cols();
|
|
|
|
-
|
|
|
|
get_slot_counts(infile);
|
|
get_slot_counts(infile);
|
|
|
|
|
|
- bins = G_calloc(num_quants, sizeof(struct bin));
|
|
|
|
|
|
+ /* sometimes two bins are needed to calculate a quantile */
|
|
|
|
+ num_bins_alloc = num_quants * 2;
|
|
|
|
+ bins = G_calloc(num_bins_alloc, sizeof(struct bin));
|
|
initialize_bins();
|
|
initialize_bins();
|
|
G_free(slots);
|
|
G_free(slots);
|
|
|
|
|