123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495 |
- /*
- * Calculates univariate statistics from the non-null cells
- *
- * Copyright (C) 2004-2007 by the GRASS Development Team
- * Author(s): Hamish Bowman, University of Otago, New Zealand
- * Martin Landa and Soeren Gebbert
- *
- * This program is free software under the GNU General Public
- * License (>=v2). Read the file COPYING that comes with GRASS
- * for details.
- *
- */
- #include "globals.h"
- /* *************************************************************** */
- /* **** univar_stat constructor ********************************** */
- /* *************************************************************** */
- univar_stat *create_univar_stat_struct(int map_type, int n_perc)
- {
- univar_stat *stats;
- int i;
- int n_zones = zone_info.n_zones;
- if (n_zones == 0)
- n_zones = 1;
- stats = (univar_stat *) G_calloc(n_zones, sizeof(univar_stat));
- for (i = 0; i < n_zones; i++) {
- stats[i].sum = 0.0;
- stats[i].sumsq = 0.0;
- stats[i].min = 0.0 / 0.0; /* set to nan as default */
- stats[i].max = 0.0 / 0.0; /* set to nan as default */
- stats[i].n_perc = n_perc;
- if (n_perc > 0)
- stats[i].perc = (double *)G_malloc(n_perc * sizeof(double));
- else
- stats[i].perc = NULL;
- stats[i].sum_abs = 0.0;
- stats[i].n = 0;
- stats[i].size = 0;
- stats[i].dcell_array = NULL;
- stats[i].fcell_array = NULL;
- stats[i].cell_array = NULL;
- stats[i].map_type = map_type;
- stats[i].n_alloc = 0;
- stats[i].first = TRUE;
- /* allocate memory for extended computation */
- /* changed to on-demand block allocation */
- /* if (param.extended->answer) {
- if (map_type == DCELL_TYPE) {
- stats[i].dcell_array = NULL;
- }
- else if (map_type == FCELL_TYPE) {
- stats[i].fcell_array =NULL;
- }
- else if (map_type == CELL_TYPE) {
- stats[i].cell_array = NULL;
- }
- }
- */
- }
- return stats;
- }
- /* *************************************************************** */
- /* **** univar_stat destructor *********************************** */
- /* *************************************************************** */
- void free_univar_stat_struct(univar_stat * stats)
- {
- int i;
- int n_zones = zone_info.n_zones;
- if (n_zones == 0)
- n_zones = 1;
- for (i = 0; i < n_zones; i++){
- if (stats[i].perc)
- G_free(stats[i].perc);
- if (stats[i].dcell_array)
- G_free(stats[i].dcell_array);
- if (stats[i].fcell_array)
- G_free(stats[i].fcell_array);
- if (stats[i].cell_array)
- G_free(stats[i].cell_array);
- }
- G_free(stats);
- return;
- }
- /* *************************************************************** */
- /* **** compute and print univar statistics to stdout ************ */
- /* *************************************************************** */
- int print_stats(univar_stat * stats)
- {
- int z, n_zones = zone_info.n_zones;
- if (n_zones == 0)
- n_zones = 1;
- for (z = 0; z < n_zones; z++) {
- char sum_str[100];
- double mean, variance, stdev, var_coef;
- /* for extendet stats */
- double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
- double median = 0.0;
- unsigned int i;
- int qpos_25, qpos_75, *qpos_perc;
- /* stats collected for this zone? */
- if (stats[z].n == 0)
- continue;
- /* all these calculations get promoted to doubles, so any DIV0 becomes nan */
- mean = stats[z].sum / stats[z].n;
- variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n;
- if (variance < GRASS_EPSILON)
- variance = 0.0;
- stdev = sqrt(variance);
- var_coef = (stdev / mean) * 100.; /* perhaps stdev/fabs(mean) ? */
- sprintf(sum_str, "%.10f", stats[z].sum);
- G_trim_decimal(sum_str);
- if (zone_info.n_zones) {
- int z_cat = z + zone_info.min;
-
- fprintf(stdout, "\nzone %d %s\n\n", z_cat, Rast_get_c_cat(&z_cat, &(zone_info.cats)));
- }
- if (!param.shell_style->answer) {
- fprintf(stdout, "total null and non-null cells: %lu\n", stats[z].size);
- fprintf(stdout, "total null cells: %lu\n\n", stats[z].size - stats[z].n);
- fprintf(stdout, "Of the non-null cells:\n----------------------\n");
- }
- if (param.shell_style->answer) {
- fprintf(stdout, "n=%lu\n", stats[z].n);
- fprintf(stdout, "null_cells=%lu\n", stats[z].size - stats[z].n);
- fprintf(stdout, "cells=%lu\n", stats->size);
- fprintf(stdout, "min=%.15g\n", stats[z].min);
- fprintf(stdout, "max=%.15g\n", stats[z].max);
- fprintf(stdout, "range=%.15g\n", stats[z].max - stats[z].min);
- fprintf(stdout, "mean=%.15g\n", mean);
- fprintf(stdout, "mean_of_abs=%.15g\n", stats[z].sum_abs / stats[z].n);
- fprintf(stdout, "stddev=%.15g\n", stdev);
- fprintf(stdout, "variance=%.15g\n", variance);
- fprintf(stdout, "coeff_var=%.15g\n", var_coef);
- fprintf(stdout, "sum=%s\n", sum_str);
- }
- else {
- fprintf(stdout, "n: %lu\n", stats[z].n);
- fprintf(stdout, "minimum: %g\n", stats[z].min);
- fprintf(stdout, "maximum: %g\n", stats[z].max);
- fprintf(stdout, "range: %g\n", stats[z].max - stats[z].min);
- fprintf(stdout, "mean: %g\n", mean);
- fprintf(stdout, "mean of absolute values: %g\n",
- stats[z].sum_abs / stats[z].n);
- fprintf(stdout, "standard deviation: %g\n", stdev);
- fprintf(stdout, "variance: %g\n", variance);
- fprintf(stdout, "variation coefficient: %g %%\n", var_coef);
- fprintf(stdout, "sum: %s\n", sum_str);
- }
- /* TODO: mode, skewness, kurtosis */
- if (param.extended->answer) {
- qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int));
- quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double));
- for (i = 0; i < stats[z].n_perc; i++) {
- qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5);
- }
- qpos_25 = (int)(stats[z].n * 0.25 - 0.5);
- qpos_75 = (int)(stats[z].n * 0.75 - 0.5);
- switch (stats[z].map_type) {
- case CELL_TYPE:
- heapsort_int(stats[z].cell_array, stats[z].n);
- quartile_25 = (double)stats[z].cell_array[qpos_25];
- if (stats[z].n % 2) /* odd */
- median = (double)stats[z].cell_array[(int)(stats[z].n / 2)];
- else /* even */
- median =
- (double)(stats[z].cell_array[stats[z].n / 2 - 1] +
- stats[z].cell_array[stats[z].n / 2]) / 2.0;
- quartile_75 = (double)stats[z].cell_array[qpos_75];
- for (i = 0; i < stats[z].n_perc; i++) {
- quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]];
- }
- break;
- case FCELL_TYPE:
- heapsort_float(stats[z].fcell_array, stats[z].n);
- quartile_25 = (double)stats[z].fcell_array[qpos_25];
- if (stats[z].n % 2) /* odd */
- median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)];
- else /* even */
- median =
- (double)(stats[z].fcell_array[stats[z].n / 2 - 1] +
- stats[z].fcell_array[stats[z].n / 2]) / 2.0;
- quartile_75 = (double)stats[z].fcell_array[qpos_75];
- for (i = 0; i < stats[z].n_perc; i++) {
- quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]];
- }
- break;
- case DCELL_TYPE:
- heapsort_double(stats[z].dcell_array, stats[z].n);
- quartile_25 = stats[z].dcell_array[qpos_25];
- if (stats[z].n % 2) /* odd */
- median = stats[z].dcell_array[(int)(stats[z].n / 2)];
- else /* even */
- median =
- (stats[z].dcell_array[stats[z].n / 2 - 1] +
- stats[z].dcell_array[stats[z].n / 2]) / 2.0;
- quartile_75 = stats[z].dcell_array[qpos_75];
- for (i = 0; i < stats[z].n_perc; i++) {
- quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]];
- }
- break;
- default:
- break;
- }
- if (param.shell_style->answer) {
- fprintf(stdout, "first_quartile=%g\n", quartile_25);
- fprintf(stdout, "median=%g\n", median);
- fprintf(stdout, "third_quartile=%g\n", quartile_75);
- for (i = 0; i < stats[z].n_perc; i++) {
- char buf[24];
- sprintf(buf, "%.15g", stats[z].perc[i]);
- G_strchg(buf, '.', '_');
- fprintf(stdout, "percentile_%s=%g\n", buf,
- quartile_perc[i]);
- }
- }
- else {
- fprintf(stdout, "1st quartile: %g\n", quartile_25);
- if (stats[z].n % 2)
- fprintf(stdout, "median (odd number of cells): %g\n", median);
- else
- fprintf(stdout, "median (even number of cells): %g\n",
- median);
- fprintf(stdout, "3rd quartile: %g\n", quartile_75);
- for (i = 0; i < stats[z].n_perc; i++) {
- if (stats[z].perc[i] == (int)stats[z].perc[i]) {
- /* percentile is an exact integer */
- if ((int)stats[z].perc[i] % 10 == 1 && (int)stats[z].perc[i] != 11)
- fprintf(stdout, "%dst percentile: %g\n", (int)stats[z].perc[i],
- quartile_perc[i]);
- else if ((int)stats[z].perc[i] % 10 == 2 && (int)stats[z].perc[i] != 12)
- fprintf(stdout, "%dnd percentile: %g\n", (int)stats[z].perc[i],
- quartile_perc[i]);
- else if ((int)stats[z].perc[i] % 10 == 3 && (int)stats[z].perc[i] != 13)
- fprintf(stdout, "%drd percentile: %g\n", (int)stats[z].perc[i],
- quartile_perc[i]);
- else
- fprintf(stdout, "%dth percentile: %g\n", (int)stats[z].perc[i],
- quartile_perc[i]);
- }
- else {
- /* percentile is not an exact integer */
- fprintf(stdout, "%.15g percentile: %g\n", stats[z].perc[i],
- quartile_perc[i]);
- }
- }
- }
- G_free((void *)quartile_perc);
- G_free((void *)qpos_perc);
- }
- /* G_message() prints to stderr not stdout: disabled. this \n is printed above with zone */
- /* if (!(param.shell_style->answer))
- G_message("\n"); */
- }
- return 1;
- }
- int print_stats_table(univar_stat * stats)
- {
- unsigned int i;
- int z, n_zones = zone_info.n_zones;
- if (n_zones == 0)
- n_zones = 1;
- /* print column headers */
- if (zone_info.n_zones) {
- fprintf(stdout, "zone%s", zone_info.sep);
- fprintf(stdout, "label%s", zone_info.sep);
- }
- fprintf(stdout, "non_null_cells%s", zone_info.sep);
- fprintf(stdout, "null_cells%s", zone_info.sep);
- fprintf(stdout, "min%s", zone_info.sep);
- fprintf(stdout, "max%s", zone_info.sep);
- fprintf(stdout, "range%s", zone_info.sep);
- fprintf(stdout, "mean%s", zone_info.sep);
- fprintf(stdout, "mean_of_abs%s", zone_info.sep);
- fprintf(stdout, "stddev%s", zone_info.sep);
- fprintf(stdout, "variance%s", zone_info.sep);
- fprintf(stdout, "coeff_var%s", zone_info.sep);
- fprintf(stdout, "sum%s", zone_info.sep);
- fprintf(stdout, "sum_abs");
- if (param.extended->answer) {
- fprintf(stdout, "%sfirst_quart", zone_info.sep);
- fprintf(stdout, "%smedian", zone_info.sep);
- fprintf(stdout, "%sthird_quart", zone_info.sep);
- for (i = 0; i < stats[0].n_perc; i++) {
- if (stats[0].perc[i] == (int)stats[0].perc[i]) {
- /* percentile is an exact integer */
- fprintf(stdout, "%sperc_%d", zone_info.sep, (int)stats[0].perc[i]);
- }
- else {
- /* percentile is not an exact integer */
- char buf[24];
- sprintf(buf, "%.15g", stats[0].perc[i]);
- G_strchg(buf, '.', '_');
- fprintf(stdout, "%sperc_%s", zone_info.sep, buf);
- }
- }
- }
- fprintf(stdout, "\n");
- /* print stats */
- for (z = 0; z < n_zones; z++) {
- char sum_str[100];
- double mean, variance, stdev, var_coef;
- /* for extendet stats */
- double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
- double median = 0.0;
- int qpos_25, qpos_75, *qpos_perc;
- /* stats collected for this zone? */
- if (stats[z].n == 0)
- continue;
- i = 0;
- /* all these calculations get promoted to doubles, so any DIV0 becomes nan */
- mean = stats[z].sum / stats[z].n;
- variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n;
- if (variance < GRASS_EPSILON)
- variance = 0.0;
- stdev = sqrt(variance);
- var_coef = (stdev / mean) * 100.; /* perhaps stdev/fabs(mean) ? */
- if (zone_info.n_zones) {
- int z_cat = z + zone_info.min;
- /* zone number */
- fprintf(stdout, "%d%s", z + zone_info.min, zone_info.sep);
- /* zone label */
- fprintf(stdout,"%s%s", Rast_get_c_cat(&z_cat, &(zone_info.cats)), zone_info.sep);
- }
- /* non-null cells cells */
- fprintf(stdout, "%lu%s", stats[z].n, zone_info.sep);
- /* null cells */
- fprintf(stdout, "%lu%s", stats[z].size - stats[z].n, zone_info.sep);
- /* min */
- fprintf(stdout, "%.15g%s", stats[z].min, zone_info.sep);
- /* max */
- fprintf(stdout, "%.15g%s", stats[z].max, zone_info.sep);
- /* range */
- fprintf(stdout, "%.15g%s", stats[z].max - stats[z].min, zone_info.sep);
- /* mean */
- fprintf(stdout, "%.15g%s", mean, zone_info.sep);
- /* mean of abs */
- fprintf(stdout, "%.15g%s", stats[z].sum_abs / stats[z].n, zone_info.sep);
- /* stddev */
- fprintf(stdout, "%.15g%s", stdev, zone_info.sep);
- /* variance */
- fprintf(stdout, "%.15g%s", variance, zone_info.sep);
- /* coefficient of variance */
- fprintf(stdout, "%.15g%s", var_coef, zone_info.sep);
- /* sum */
- sprintf(sum_str, "%.10f", stats[z].sum);
- G_trim_decimal(sum_str);
- fprintf(stdout, "%s%s", sum_str, zone_info.sep);
- /* absolute sum */
- sprintf(sum_str, "%.10f", stats[z].sum_abs);
- G_trim_decimal(sum_str);
- fprintf(stdout, "%s", sum_str);
- /* TODO: mode, skewness, kurtosis */
- if (param.extended->answer) {
- qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int));
- quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double));
- for (i = 0; i < stats[z].n_perc; i++) {
- qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5);
- }
- qpos_25 = (int)(stats[z].n * 0.25 - 0.5);
- qpos_75 = (int)(stats[z].n * 0.75 - 0.5);
- switch (stats[z].map_type) {
- case CELL_TYPE:
- heapsort_int(stats[z].cell_array, stats[z].n);
- quartile_25 = (double)stats[z].cell_array[qpos_25];
- if (stats[z].n % 2) /* odd */
- median = (double)stats[z].cell_array[(int)(stats[z].n / 2)];
- else /* even */
- median =
- (double)(stats[z].cell_array[stats[z].n / 2 - 1] +
- stats[z].cell_array[stats[z].n / 2]) / 2.0;
- quartile_75 = (double)stats[z].cell_array[qpos_75];
- for (i = 0; i < stats[z].n_perc; i++) {
- quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]];
- }
- break;
- case FCELL_TYPE:
- heapsort_float(stats[z].fcell_array, stats[z].n);
- quartile_25 = (double)stats[z].fcell_array[qpos_25];
- if (stats[z].n % 2) /* odd */
- median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)];
- else /* even */
- median =
- (double)(stats[z].fcell_array[stats[z].n / 2 - 1] +
- stats[z].fcell_array[stats[z].n / 2]) / 2.0;
- quartile_75 = (double)stats[z].fcell_array[qpos_75];
- for (i = 0; i < stats[z].n_perc; i++) {
- quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]];
- }
- break;
- case DCELL_TYPE:
- heapsort_double(stats[z].dcell_array, stats[z].n);
- quartile_25 = stats[z].dcell_array[qpos_25];
- if (stats[z].n % 2) /* odd */
- median = stats[z].dcell_array[(int)(stats[z].n / 2)];
- else /* even */
- median =
- (stats[z].dcell_array[stats[z].n / 2 - 1] +
- stats[z].dcell_array[stats[z].n / 2]) / 2.0;
- quartile_75 = stats[z].dcell_array[qpos_75];
- for (i = 0; i < stats[z].n_perc; i++) {
- quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]];
- }
- break;
- default:
- break;
- }
- /* first quartile */
- fprintf(stdout, "%s%g", zone_info.sep, quartile_25);
- /* median */
- fprintf(stdout, "%s%g", zone_info.sep, median);
- /* third quartile */
- fprintf(stdout, "%s%g", zone_info.sep, quartile_75);
- /* percentiles */
- for (i = 0; i < stats[z].n_perc; i++) {
- fprintf(stdout, "%s%g", zone_info.sep ,
- quartile_perc[i]);
- }
- G_free((void *)quartile_perc);
- G_free((void *)qpos_perc);
- }
- fprintf(stdout, "\n");
- /* zone z finished */
- }
- return 1;
- }
|