stats.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. /*
  2. * Calculates univariate statistics from the non-null cells
  3. *
  4. * Copyright (C) 2004-2007 by the GRASS Development Team
  5. * Author(s): Hamish Bowman, University of Otago, New Zealand
  6. * Martin Landa and Soeren Gebbert
  7. *
  8. * This program is free software under the GNU General Public
  9. * License (>=v2). Read the file COPYING that comes with GRASS
  10. * for details.
  11. *
  12. */
  13. #include "globals.h"
  14. /* *************************************************************** */
  15. /* **** univar_stat constructor ********************************** */
  16. /* *************************************************************** */
  17. univar_stat *create_univar_stat_struct(int map_type, int n_perc)
  18. {
  19. univar_stat *stats;
  20. int i;
  21. int n_zones = zone_info.n_zones;
  22. if (n_zones == 0)
  23. n_zones = 1;
  24. stats = (univar_stat *) G_calloc(n_zones, sizeof(univar_stat));
  25. for (i = 0; i < n_zones; i++) {
  26. stats[i].sum = 0.0;
  27. stats[i].sumsq = 0.0;
  28. stats[i].min = 0.0 / 0.0; /* set to nan as default */
  29. stats[i].max = 0.0 / 0.0; /* set to nan as default */
  30. stats[i].n_perc = n_perc;
  31. if (n_perc > 0)
  32. stats[i].perc = (double *)G_malloc(n_perc * sizeof(double));
  33. else
  34. stats[i].perc = NULL;
  35. stats[i].sum_abs = 0.0;
  36. stats[i].n = 0;
  37. stats[i].size = 0;
  38. stats[i].dcell_array = NULL;
  39. stats[i].fcell_array = NULL;
  40. stats[i].cell_array = NULL;
  41. stats[i].map_type = map_type;
  42. stats[i].n_alloc = 0;
  43. stats[i].first = TRUE;
  44. /* allocate memory for extended computation */
  45. /* changed to on-demand block allocation */
  46. /* if (param.extended->answer) {
  47. if (map_type == DCELL_TYPE) {
  48. stats[i].dcell_array = NULL;
  49. }
  50. else if (map_type == FCELL_TYPE) {
  51. stats[i].fcell_array =NULL;
  52. }
  53. else if (map_type == CELL_TYPE) {
  54. stats[i].cell_array = NULL;
  55. }
  56. }
  57. */
  58. }
  59. return stats;
  60. }
  61. /* *************************************************************** */
  62. /* **** univar_stat destructor *********************************** */
  63. /* *************************************************************** */
  64. void free_univar_stat_struct(univar_stat * stats)
  65. {
  66. int i;
  67. int n_zones = zone_info.n_zones;
  68. if (n_zones == 0)
  69. n_zones = 1;
  70. for (i = 0; i < n_zones; i++){
  71. if (stats[i].perc)
  72. G_free(stats[i].perc);
  73. if (stats[i].dcell_array)
  74. G_free(stats[i].dcell_array);
  75. if (stats[i].fcell_array)
  76. G_free(stats[i].fcell_array);
  77. if (stats[i].cell_array)
  78. G_free(stats[i].cell_array);
  79. }
  80. G_free(stats);
  81. return;
  82. }
  83. /* *************************************************************** */
  84. /* **** compute and print univar statistics to stdout ************ */
  85. /* *************************************************************** */
  86. int print_stats(univar_stat * stats)
  87. {
  88. int z, n_zones = zone_info.n_zones;
  89. if (n_zones == 0)
  90. n_zones = 1;
  91. for (z = 0; z < n_zones; z++) {
  92. char sum_str[100];
  93. double mean, variance, stdev, var_coef;
  94. /* for extendet stats */
  95. double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
  96. double median = 0.0;
  97. unsigned int i;
  98. int qpos_25, qpos_75, *qpos_perc;
  99. /* stats collected for this zone? */
  100. if (stats[z].n == 0)
  101. continue;
  102. /* all these calculations get promoted to doubles, so any DIV0 becomes nan */
  103. mean = stats[z].sum / stats[z].n;
  104. variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n;
  105. if (variance < GRASS_EPSILON)
  106. variance = 0.0;
  107. stdev = sqrt(variance);
  108. var_coef = (stdev / mean) * 100.; /* perhaps stdev/fabs(mean) ? */
  109. sprintf(sum_str, "%.10f", stats[z].sum);
  110. G_trim_decimal(sum_str);
  111. if (zone_info.n_zones) {
  112. int z_cat = z + zone_info.min;
  113. fprintf(stdout, "\nzone %d %s\n\n", z_cat, Rast_get_c_cat(&z_cat, &(zone_info.cats)));
  114. }
  115. if (!param.shell_style->answer) {
  116. fprintf(stdout, "total null and non-null cells: %lu\n", stats[z].size);
  117. fprintf(stdout, "total null cells: %lu\n\n", stats[z].size - stats[z].n);
  118. fprintf(stdout, "Of the non-null cells:\n----------------------\n");
  119. }
  120. if (param.shell_style->answer) {
  121. fprintf(stdout, "n=%lu\n", stats[z].n);
  122. fprintf(stdout, "null_cells=%lu\n", stats[z].size - stats[z].n);
  123. fprintf(stdout, "cells=%lu\n", stats->size);
  124. fprintf(stdout, "min=%.15g\n", stats[z].min);
  125. fprintf(stdout, "max=%.15g\n", stats[z].max);
  126. fprintf(stdout, "range=%.15g\n", stats[z].max - stats[z].min);
  127. fprintf(stdout, "mean=%.15g\n", mean);
  128. fprintf(stdout, "mean_of_abs=%.15g\n", stats[z].sum_abs / stats[z].n);
  129. fprintf(stdout, "stddev=%.15g\n", stdev);
  130. fprintf(stdout, "variance=%.15g\n", variance);
  131. fprintf(stdout, "coeff_var=%.15g\n", var_coef);
  132. fprintf(stdout, "sum=%s\n", sum_str);
  133. }
  134. else {
  135. fprintf(stdout, "n: %lu\n", stats[z].n);
  136. fprintf(stdout, "minimum: %g\n", stats[z].min);
  137. fprintf(stdout, "maximum: %g\n", stats[z].max);
  138. fprintf(stdout, "range: %g\n", stats[z].max - stats[z].min);
  139. fprintf(stdout, "mean: %g\n", mean);
  140. fprintf(stdout, "mean of absolute values: %g\n",
  141. stats[z].sum_abs / stats[z].n);
  142. fprintf(stdout, "standard deviation: %g\n", stdev);
  143. fprintf(stdout, "variance: %g\n", variance);
  144. fprintf(stdout, "variation coefficient: %g %%\n", var_coef);
  145. fprintf(stdout, "sum: %s\n", sum_str);
  146. }
  147. /* TODO: mode, skewness, kurtosis */
  148. if (param.extended->answer) {
  149. qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int));
  150. quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double));
  151. for (i = 0; i < stats[z].n_perc; i++) {
  152. qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5);
  153. }
  154. qpos_25 = (int)(stats[z].n * 0.25 - 0.5);
  155. qpos_75 = (int)(stats[z].n * 0.75 - 0.5);
  156. switch (stats[z].map_type) {
  157. case CELL_TYPE:
  158. heapsort_int(stats[z].cell_array, stats[z].n);
  159. quartile_25 = (double)stats[z].cell_array[qpos_25];
  160. if (stats[z].n % 2) /* odd */
  161. median = (double)stats[z].cell_array[(int)(stats[z].n / 2)];
  162. else /* even */
  163. median =
  164. (double)(stats[z].cell_array[stats[z].n / 2 - 1] +
  165. stats[z].cell_array[stats[z].n / 2]) / 2.0;
  166. quartile_75 = (double)stats[z].cell_array[qpos_75];
  167. for (i = 0; i < stats[z].n_perc; i++) {
  168. quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]];
  169. }
  170. break;
  171. case FCELL_TYPE:
  172. heapsort_float(stats[z].fcell_array, stats[z].n);
  173. quartile_25 = (double)stats[z].fcell_array[qpos_25];
  174. if (stats[z].n % 2) /* odd */
  175. median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)];
  176. else /* even */
  177. median =
  178. (double)(stats[z].fcell_array[stats[z].n / 2 - 1] +
  179. stats[z].fcell_array[stats[z].n / 2]) / 2.0;
  180. quartile_75 = (double)stats[z].fcell_array[qpos_75];
  181. for (i = 0; i < stats[z].n_perc; i++) {
  182. quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]];
  183. }
  184. break;
  185. case DCELL_TYPE:
  186. heapsort_double(stats[z].dcell_array, stats[z].n);
  187. quartile_25 = stats[z].dcell_array[qpos_25];
  188. if (stats[z].n % 2) /* odd */
  189. median = stats[z].dcell_array[(int)(stats[z].n / 2)];
  190. else /* even */
  191. median =
  192. (stats[z].dcell_array[stats[z].n / 2 - 1] +
  193. stats[z].dcell_array[stats[z].n / 2]) / 2.0;
  194. quartile_75 = stats[z].dcell_array[qpos_75];
  195. for (i = 0; i < stats[z].n_perc; i++) {
  196. quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]];
  197. }
  198. break;
  199. default:
  200. break;
  201. }
  202. if (param.shell_style->answer) {
  203. fprintf(stdout, "first_quartile=%g\n", quartile_25);
  204. fprintf(stdout, "median=%g\n", median);
  205. fprintf(stdout, "third_quartile=%g\n", quartile_75);
  206. for (i = 0; i < stats[z].n_perc; i++) {
  207. char buf[24];
  208. sprintf(buf, "%.15g", stats[z].perc[i]);
  209. G_strchg(buf, '.', '_');
  210. fprintf(stdout, "percentile_%s=%g\n", buf,
  211. quartile_perc[i]);
  212. }
  213. }
  214. else {
  215. fprintf(stdout, "1st quartile: %g\n", quartile_25);
  216. if (stats[z].n % 2)
  217. fprintf(stdout, "median (odd number of cells): %g\n", median);
  218. else
  219. fprintf(stdout, "median (even number of cells): %g\n",
  220. median);
  221. fprintf(stdout, "3rd quartile: %g\n", quartile_75);
  222. for (i = 0; i < stats[z].n_perc; i++) {
  223. if (stats[z].perc[i] == (int)stats[z].perc[i]) {
  224. /* percentile is an exact integer */
  225. if ((int)stats[z].perc[i] % 10 == 1 && (int)stats[z].perc[i] != 11)
  226. fprintf(stdout, "%dst percentile: %g\n", (int)stats[z].perc[i],
  227. quartile_perc[i]);
  228. else if ((int)stats[z].perc[i] % 10 == 2 && (int)stats[z].perc[i] != 12)
  229. fprintf(stdout, "%dnd percentile: %g\n", (int)stats[z].perc[i],
  230. quartile_perc[i]);
  231. else if ((int)stats[z].perc[i] % 10 == 3 && (int)stats[z].perc[i] != 13)
  232. fprintf(stdout, "%drd percentile: %g\n", (int)stats[z].perc[i],
  233. quartile_perc[i]);
  234. else
  235. fprintf(stdout, "%dth percentile: %g\n", (int)stats[z].perc[i],
  236. quartile_perc[i]);
  237. }
  238. else {
  239. /* percentile is not an exact integer */
  240. fprintf(stdout, "%.15g percentile: %g\n", stats[z].perc[i],
  241. quartile_perc[i]);
  242. }
  243. }
  244. }
  245. G_free((void *)quartile_perc);
  246. G_free((void *)qpos_perc);
  247. }
  248. /* G_message() prints to stderr not stdout: disabled. this \n is printed above with zone */
  249. /* if (!(param.shell_style->answer))
  250. G_message("\n"); */
  251. }
  252. return 1;
  253. }
  254. int print_stats_table(univar_stat * stats)
  255. {
  256. unsigned int i;
  257. int z, n_zones = zone_info.n_zones;
  258. if (n_zones == 0)
  259. n_zones = 1;
  260. /* print column headers */
  261. if (zone_info.n_zones) {
  262. fprintf(stdout, "zone%s", zone_info.sep);
  263. fprintf(stdout, "label%s", zone_info.sep);
  264. }
  265. fprintf(stdout, "non_null_cells%s", zone_info.sep);
  266. fprintf(stdout, "null_cells%s", zone_info.sep);
  267. fprintf(stdout, "min%s", zone_info.sep);
  268. fprintf(stdout, "max%s", zone_info.sep);
  269. fprintf(stdout, "range%s", zone_info.sep);
  270. fprintf(stdout, "mean%s", zone_info.sep);
  271. fprintf(stdout, "mean_of_abs%s", zone_info.sep);
  272. fprintf(stdout, "stddev%s", zone_info.sep);
  273. fprintf(stdout, "variance%s", zone_info.sep);
  274. fprintf(stdout, "coeff_var%s", zone_info.sep);
  275. fprintf(stdout, "sum%s", zone_info.sep);
  276. fprintf(stdout, "sum_abs");
  277. if (param.extended->answer) {
  278. fprintf(stdout, "%sfirst_quart", zone_info.sep);
  279. fprintf(stdout, "%smedian", zone_info.sep);
  280. fprintf(stdout, "%sthird_quart", zone_info.sep);
  281. for (i = 0; i < stats[0].n_perc; i++) {
  282. if (stats[0].perc[i] == (int)stats[0].perc[i]) {
  283. /* percentile is an exact integer */
  284. fprintf(stdout, "%sperc_%d", zone_info.sep, (int)stats[0].perc[i]);
  285. }
  286. else {
  287. /* percentile is not an exact integer */
  288. char buf[24];
  289. sprintf(buf, "%.15g", stats[0].perc[i]);
  290. G_strchg(buf, '.', '_');
  291. fprintf(stdout, "%sperc_%s", zone_info.sep, buf);
  292. }
  293. }
  294. }
  295. fprintf(stdout, "\n");
  296. /* print stats */
  297. for (z = 0; z < n_zones; z++) {
  298. char sum_str[100];
  299. double mean, variance, stdev, var_coef;
  300. /* for extendet stats */
  301. double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
  302. double median = 0.0;
  303. int qpos_25, qpos_75, *qpos_perc;
  304. /* stats collected for this zone? */
  305. if (stats[z].n == 0)
  306. continue;
  307. i = 0;
  308. /* all these calculations get promoted to doubles, so any DIV0 becomes nan */
  309. mean = stats[z].sum / stats[z].n;
  310. variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n;
  311. if (variance < GRASS_EPSILON)
  312. variance = 0.0;
  313. stdev = sqrt(variance);
  314. var_coef = (stdev / mean) * 100.; /* perhaps stdev/fabs(mean) ? */
  315. if (zone_info.n_zones) {
  316. int z_cat = z + zone_info.min;
  317. /* zone number */
  318. fprintf(stdout, "%d%s", z + zone_info.min, zone_info.sep);
  319. /* zone label */
  320. fprintf(stdout,"%s%s", Rast_get_c_cat(&z_cat, &(zone_info.cats)), zone_info.sep);
  321. }
  322. /* non-null cells cells */
  323. fprintf(stdout, "%lu%s", stats[z].n, zone_info.sep);
  324. /* null cells */
  325. fprintf(stdout, "%lu%s", stats[z].size - stats[z].n, zone_info.sep);
  326. /* min */
  327. fprintf(stdout, "%.15g%s", stats[z].min, zone_info.sep);
  328. /* max */
  329. fprintf(stdout, "%.15g%s", stats[z].max, zone_info.sep);
  330. /* range */
  331. fprintf(stdout, "%.15g%s", stats[z].max - stats[z].min, zone_info.sep);
  332. /* mean */
  333. fprintf(stdout, "%.15g%s", mean, zone_info.sep);
  334. /* mean of abs */
  335. fprintf(stdout, "%.15g%s", stats[z].sum_abs / stats[z].n, zone_info.sep);
  336. /* stddev */
  337. fprintf(stdout, "%.15g%s", stdev, zone_info.sep);
  338. /* variance */
  339. fprintf(stdout, "%.15g%s", variance, zone_info.sep);
  340. /* coefficient of variance */
  341. fprintf(stdout, "%.15g%s", var_coef, zone_info.sep);
  342. /* sum */
  343. sprintf(sum_str, "%.10f", stats[z].sum);
  344. G_trim_decimal(sum_str);
  345. fprintf(stdout, "%s%s", sum_str, zone_info.sep);
  346. /* absolute sum */
  347. sprintf(sum_str, "%.10f", stats[z].sum_abs);
  348. G_trim_decimal(sum_str);
  349. fprintf(stdout, "%s", sum_str);
  350. /* TODO: mode, skewness, kurtosis */
  351. if (param.extended->answer) {
  352. qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int));
  353. quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double));
  354. for (i = 0; i < stats[z].n_perc; i++) {
  355. qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5);
  356. }
  357. qpos_25 = (int)(stats[z].n * 0.25 - 0.5);
  358. qpos_75 = (int)(stats[z].n * 0.75 - 0.5);
  359. switch (stats[z].map_type) {
  360. case CELL_TYPE:
  361. heapsort_int(stats[z].cell_array, stats[z].n);
  362. quartile_25 = (double)stats[z].cell_array[qpos_25];
  363. if (stats[z].n % 2) /* odd */
  364. median = (double)stats[z].cell_array[(int)(stats[z].n / 2)];
  365. else /* even */
  366. median =
  367. (double)(stats[z].cell_array[stats[z].n / 2 - 1] +
  368. stats[z].cell_array[stats[z].n / 2]) / 2.0;
  369. quartile_75 = (double)stats[z].cell_array[qpos_75];
  370. for (i = 0; i < stats[z].n_perc; i++) {
  371. quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]];
  372. }
  373. break;
  374. case FCELL_TYPE:
  375. heapsort_float(stats[z].fcell_array, stats[z].n);
  376. quartile_25 = (double)stats[z].fcell_array[qpos_25];
  377. if (stats[z].n % 2) /* odd */
  378. median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)];
  379. else /* even */
  380. median =
  381. (double)(stats[z].fcell_array[stats[z].n / 2 - 1] +
  382. stats[z].fcell_array[stats[z].n / 2]) / 2.0;
  383. quartile_75 = (double)stats[z].fcell_array[qpos_75];
  384. for (i = 0; i < stats[z].n_perc; i++) {
  385. quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]];
  386. }
  387. break;
  388. case DCELL_TYPE:
  389. heapsort_double(stats[z].dcell_array, stats[z].n);
  390. quartile_25 = stats[z].dcell_array[qpos_25];
  391. if (stats[z].n % 2) /* odd */
  392. median = stats[z].dcell_array[(int)(stats[z].n / 2)];
  393. else /* even */
  394. median =
  395. (stats[z].dcell_array[stats[z].n / 2 - 1] +
  396. stats[z].dcell_array[stats[z].n / 2]) / 2.0;
  397. quartile_75 = stats[z].dcell_array[qpos_75];
  398. for (i = 0; i < stats[z].n_perc; i++) {
  399. quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]];
  400. }
  401. break;
  402. default:
  403. break;
  404. }
  405. /* first quartile */
  406. fprintf(stdout, "%s%g", zone_info.sep, quartile_25);
  407. /* median */
  408. fprintf(stdout, "%s%g", zone_info.sep, median);
  409. /* third quartile */
  410. fprintf(stdout, "%s%g", zone_info.sep, quartile_75);
  411. /* percentiles */
  412. for (i = 0; i < stats[z].n_perc; i++) {
  413. fprintf(stdout, "%s%g", zone_info.sep ,
  414. quartile_perc[i]);
  415. }
  416. G_free((void *)quartile_perc);
  417. G_free((void *)qpos_perc);
  418. }
  419. fprintf(stdout, "\n");
  420. /* zone z finished */
  421. }
  422. return 1;
  423. }