main.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. /***************************************************************
  2. *
  3. * MODULE: v.univar
  4. *
  5. * AUTHOR(S): Radim Blazek
  6. * Hamish Bowman, University of Otago, New Zealand (r.univar2)
  7. * Martin Landa (extended stats & OGR support)
  8. *
  9. * PURPOSE: Univariate Statistics for attribute
  10. *
  11. * COPYRIGHT: (C) 2004-2014 by the GRASS Development Team
  12. *
  13. * This program is free software under the GNU General
  14. * Public License (>=v2). Read the file COPYING that
  15. * comes with GRASS for details.
  16. *
  17. **************************************************************/
  18. /* TODO
  19. * - add flag to weigh by line/boundary length and area size
  20. * Roger Bivand on GRASS devel ml on July 2 2004
  21. * http://lists.osgeo.org/pipermail/grass-dev/2004-July/014976.html
  22. * "[...] calculating weighted means, weighting by line length
  23. * or area surface size [does not make sense]. I think it would be
  24. * better to treat each line or area as a discrete, unweighted, unit
  25. * unless some reason to the contrary is given, just like points/sites.
  26. * It is probably more important to handle missing data gracefully
  27. * than weight the means or other statistics, I think. There may be
  28. * reasons to weigh sometimes, but most often I see ratios or rates of
  29. * two variables, rather than of a single variable and length or area."
  30. * MM 04 2013: Done
  31. *
  32. * - use geodesic distances for the geometry option (distance to closest
  33. * other feature)
  34. *
  35. * - use sample instead of population mean/stddev
  36. * */
  37. #include <stdlib.h>
  38. #include <string.h>
  39. #include <math.h>
  40. #include <grass/gis.h>
  41. #include <grass/vector.h>
  42. #include <grass/dbmi.h>
  43. #include <grass/glocale.h>
  44. static void select_from_geometry(void);
  45. static void select_from_database(void);
  46. static void summary(void);
  47. struct Option *field_opt, *where_opt, *col_opt;
  48. struct Flag *shell_flag, *ext_flag, *weight_flag, *geometry;
  49. struct Map_info Map;
  50. struct line_cats *Cats;
  51. struct field_info *Fi;
  52. dbDriver *Driver;
  53. dbCatValArray Cvarr;
  54. int otype, ofield;
  55. int compatible = 1; /* types are compatible: point+centroid or line+boundary or area */
  56. int nmissing = 0; /* number of missing atttributes */
  57. int nnull = 0; /* number of null values */
  58. int nzero = 0; /* number of zero distances */
  59. int first = 1;
  60. /* Statistics */
  61. int count = 0; /* number of features with non-null attribute */
  62. int nlines; /* number of primitives */
  63. double sum = 0.0;
  64. double sumsq = 0.0;
  65. double sumcb = 0.0;
  66. double sumqt = 0.0;
  67. double sum_abs = 0.0;
  68. double min = 0.0 / 0.0; /* init as nan */
  69. double max = 0.0 / 0.0;
  70. double mean, mean_abs, pop_variance, sample_variance, pop_stdev,
  71. sample_stdev, pop_coeff_variation, kurtosis, skewness;
  72. double total_size = 0.0; /* total size: length/area */
  73. /* Extended statistics */
  74. int perc;
  75. int main(int argc, char *argv[])
  76. {
  77. struct GModule *module;
  78. struct Option *map_opt, *type_opt,
  79. *percentile;
  80. module = G_define_module();
  81. G_add_keyword(_("vector"));
  82. G_add_keyword(_("statistics"));
  83. G_add_keyword(_("univariate statistics"));
  84. G_add_keyword(_("attribute table"));
  85. G_add_keyword(_("geometry"));
  86. module->label =
  87. _("Calculates univariate statistics of vector map features.");
  88. module->description = _("Variance and standard "
  89. "deviation is calculated only for points if specified.");
  90. map_opt = G_define_standard_option(G_OPT_V_MAP);
  91. field_opt = G_define_standard_option(G_OPT_V_FIELD);
  92. type_opt = G_define_standard_option(G_OPT_V_TYPE);
  93. type_opt->options = "point,line,boundary,centroid,area";
  94. type_opt->answer = "point,line,area";
  95. col_opt = G_define_standard_option(G_OPT_DB_COLUMN);
  96. col_opt->required = NO;
  97. where_opt = G_define_standard_option(G_OPT_DB_WHERE);
  98. percentile = G_define_option();
  99. percentile->key = "percentile";
  100. percentile->type = TYPE_INTEGER;
  101. percentile->required = NO;
  102. percentile->options = "0-100";
  103. percentile->answer = "90";
  104. percentile->description =
  105. _("Percentile to calculate (requires extended statistics flag)");
  106. shell_flag = G_define_flag();
  107. shell_flag->key = 'g';
  108. shell_flag->description = _("Print the stats in shell script style");
  109. ext_flag = G_define_flag();
  110. ext_flag->key = 'e';
  111. ext_flag->description = _("Calculate extended statistics");
  112. weight_flag = G_define_flag();
  113. weight_flag->key = 'w';
  114. weight_flag->description = _("Weigh by line length or area size");
  115. geometry = G_define_flag();
  116. geometry->key = 'd';
  117. geometry->description = _("Calculate geometric distances instead of attribute statistics");
  118. G_gisinit(argv[0]);
  119. if (G_parser(argc, argv))
  120. exit(EXIT_FAILURE);
  121. /* Only require col_opt answer if -g flag is not set */
  122. if (!col_opt->answer && !geometry->answer) {
  123. G_fatal_error(_("Required parameter <%s> not set:\n\t(%s)"), col_opt->key, col_opt->description);
  124. }
  125. otype = Vect_option_to_types(type_opt);
  126. perc = atoi(percentile->answer);
  127. Cats = Vect_new_cats_struct();
  128. /* open input vector */
  129. Vect_set_open_level(2);
  130. Vect_open_old2(&Map, map_opt->answer, "", field_opt->answer);
  131. ofield = Vect_get_field_number(&Map, field_opt->answer);
  132. if ((otype & GV_POINT) && Vect_get_num_primitives(&Map, GV_POINT) == 0) {
  133. otype &= ~(GV_POINT);
  134. if (otype & GV_POINT)
  135. G_fatal_error("Bye");
  136. }
  137. if ((otype & GV_CENTROID) && Vect_get_num_primitives(&Map, GV_CENTROID) == 0) {
  138. otype &= ~(GV_CENTROID);
  139. }
  140. if ((otype & GV_LINE) && Vect_get_num_primitives(&Map, GV_LINE) == 0) {
  141. otype &= ~(GV_LINE);
  142. }
  143. if ((otype & GV_BOUNDARY) && Vect_get_num_primitives(&Map, GV_BOUNDARY) == 0) {
  144. otype &= ~(GV_BOUNDARY);
  145. }
  146. if ((otype & GV_AREA) && Vect_get_num_areas(&Map) == 0) {
  147. otype &= ~(GV_AREA);
  148. }
  149. /* Check if types are compatible */
  150. if ((otype & GV_POINTS) && ((otype & GV_LINES) || (otype & GV_AREA)))
  151. compatible = 0;
  152. if ((otype & GV_LINES) && (otype & GV_AREA))
  153. compatible = 0;
  154. if (!compatible && geometry->answer)
  155. compatible = 1; /* distances is compatible with GV_POINTS and GV_LINES */
  156. if (!compatible && !weight_flag->answer)
  157. compatible = 1; /* attributes are always compatible without weight */
  158. if (geometry->answer && (otype & GV_AREA))
  159. G_fatal_error(_("Geometry distances are not supported for areas. Use '%s' instead."), "v.distance");
  160. if (!compatible) {
  161. G_warning(_("Incompatible vector type(s) specified, only number of features, minimum, maximum and range "
  162. "can be calculated"));
  163. }
  164. if (ext_flag->answer && (!(otype & GV_POINTS) || geometry->answer)) {
  165. G_warning(_("Extended statistics is currently supported only for points/centroids"));
  166. }
  167. if (geometry->answer)
  168. select_from_geometry();
  169. else
  170. select_from_database();
  171. summary();
  172. Vect_close(&Map);
  173. exit(EXIT_SUCCESS);
  174. }
  175. void select_from_geometry(void)
  176. {
  177. int i, j, k, ncats, *cats;
  178. int type;
  179. struct line_pnts *iPoints, *jPoints;
  180. iPoints = Vect_new_line_struct();
  181. jPoints = Vect_new_line_struct();
  182. if (where_opt->answer != NULL) {
  183. if (ofield < 1) {
  184. G_fatal_error(_("'layer' must be > 0 for 'where'."));
  185. }
  186. Fi = Vect_get_field(&Map, ofield);
  187. if (!Fi) {
  188. G_fatal_error(_("Database connection not defined for layer %d"),
  189. ofield);
  190. }
  191. Driver = db_start_driver_open_database(Fi->driver, Fi->database);
  192. if (Driver == NULL)
  193. G_fatal_error("Unable to open database <%s> by driver <%s>",
  194. Fi->database, Fi->driver);
  195. db_set_error_handler_driver(Driver);
  196. ncats = db_select_int(Driver, Fi->table, Fi->key, where_opt->answer,
  197. &cats);
  198. if (ncats == -1)
  199. G_fatal_error(_("Unable select categories from table <%s>"), Fi->table);
  200. db_close_database_shutdown_driver(Driver);
  201. }
  202. else
  203. ncats = 0;
  204. count = 0;
  205. nlines = Vect_get_num_lines(&Map);
  206. G_message(_("Calculating geometric distances between %d primitives..."), nlines);
  207. /* Start calculating the statistics based on distance to all other primitives.
  208. Use the centroid of areas and the first point of lines */
  209. for (i = 1; i <= nlines; i++) {
  210. G_percent(i, nlines, 2);
  211. type = Vect_read_line(&Map, iPoints, Cats, i);
  212. if (!(type & otype))
  213. continue;
  214. if (where_opt->answer) {
  215. int ok = FALSE;
  216. for (j = 0; j < Cats->n_cats; j++) {
  217. if (Vect_cat_in_array(Cats->cat[j], cats, ncats)) {
  218. ok = TRUE;
  219. break;
  220. }
  221. }
  222. if (!ok)
  223. continue;
  224. }
  225. for (j = i + 1; j < nlines; j++) {
  226. /* get distance to this object */
  227. double val = 0.0;
  228. type = Vect_read_line(&Map, jPoints, Cats, j);
  229. if (!(type & otype))
  230. continue;
  231. /* now calculate the min distance between each point in line i with line j */
  232. for (k = 0; k < iPoints->n_points; k++) {
  233. double dmin = 0.0;
  234. Vect_line_distance(jPoints, iPoints->x[k], iPoints->y[k], iPoints->z[k], 1,
  235. NULL, NULL, NULL, &dmin, NULL, NULL);
  236. if((k == 0) || (dmin < val)) {
  237. val = dmin;
  238. }
  239. }
  240. if (val > 0 && iPoints->n_points > 1) {
  241. for (k = 0; k < jPoints->n_points; k++) {
  242. double dmin = 0.0;
  243. Vect_line_distance(iPoints, jPoints->x[k], jPoints->y[k], jPoints->z[k], 1,
  244. NULL, NULL, NULL, &dmin, NULL, NULL);
  245. if(dmin < val) {
  246. val = dmin;
  247. }
  248. }
  249. }
  250. if (val > 0 && iPoints->n_points > 1 && jPoints->n_points > 1) {
  251. if (Vect_line_check_intersection(iPoints, jPoints, Vect_is_3d(&Map)))
  252. val = 0;
  253. }
  254. if (val == 0) {
  255. nzero++;
  256. continue;
  257. }
  258. if (first) {
  259. max = val;
  260. min = val;
  261. first = 0;
  262. }
  263. else {
  264. if (val > max)
  265. max = val;
  266. if (val < min)
  267. min = val;
  268. }
  269. sum += val;
  270. sumsq += val * val;
  271. sumcb += val * val * val;
  272. sumqt += val * val * val * val;
  273. sum_abs += fabs(val);
  274. count++;
  275. G_debug(3, "i=%d j=%d sum = %f val=%f", i, j, sum, val);
  276. }
  277. }
  278. }
  279. void select_from_database(void)
  280. {
  281. int nrec, ctype, nlines, line, nareas, area;
  282. struct line_pnts *Points;
  283. Fi = Vect_get_field(&Map, ofield);
  284. if (Fi == NULL) {
  285. G_fatal_error(_(" Database connection not defined for layer <%s>"), field_opt->answer);
  286. }
  287. Driver = db_start_driver_open_database(Fi->driver, Fi->database);
  288. if (Driver == NULL)
  289. G_fatal_error("Unable to open database <%s> by driver <%s>",
  290. Fi->database, Fi->driver);
  291. db_set_error_handler_driver(Driver);
  292. /* check if column exists */
  293. ctype = db_column_Ctype(Driver, Fi->table, col_opt->answer);
  294. if (ctype == -1)
  295. G_fatal_error(_("Column <%s> not found in table <%s>"),
  296. col_opt->answer, Fi->table);
  297. if (ctype != DB_C_TYPE_INT && ctype != DB_C_TYPE_DOUBLE)
  298. G_fatal_error(_("Only numeric column type is supported"));
  299. /* Note do not check if the column exists in the table because it may be an expression */
  300. db_CatValArray_init(&Cvarr);
  301. nrec = db_select_CatValArray(Driver, Fi->table, Fi->key, col_opt->answer,
  302. where_opt->answer, &Cvarr);
  303. G_debug(2, "db_select_CatValArray() nrec = %d", nrec);
  304. if (nrec < 0)
  305. G_fatal_error(_("Unable to select data from table"));
  306. db_close_database_shutdown_driver(Driver);
  307. Points = Vect_new_line_struct();
  308. /* Lines */
  309. nlines = 0;
  310. if ((otype & GV_POINTS) || (otype & GV_LINES))
  311. nlines = Vect_get_num_lines(&Map);
  312. G_debug(1, "select_from_database: %d points", nlines);
  313. for (line = 1; line <= nlines; line++) {
  314. int i, type;
  315. G_debug(3, "line = %d", line);
  316. G_percent(line, nlines, 2);
  317. type = Vect_read_line(&Map, Points, Cats, line);
  318. if (!(type & otype))
  319. continue;
  320. for (i = 0; i < Cats->n_cats; i++) {
  321. if (Cats->field[i] == ofield) {
  322. double val = 0.0;
  323. dbCatVal *catval;
  324. G_debug(3, "cat = %d", Cats->cat[i]);
  325. if (db_CatValArray_get_value(&Cvarr, Cats->cat[i], &catval) !=
  326. DB_OK) {
  327. G_debug(3, "No record for cat = %d", Cats->cat[i]);
  328. nmissing++;
  329. continue;
  330. }
  331. if (catval->isNull) {
  332. G_debug(3, "NULL value for cat = %d", Cats->cat[i]);
  333. nnull++;
  334. continue;
  335. }
  336. if (ctype == DB_C_TYPE_INT) {
  337. val = catval->val.i;
  338. }
  339. else if (ctype == DB_C_TYPE_DOUBLE) {
  340. val = catval->val.d;
  341. }
  342. count++;
  343. if (first) {
  344. max = val;
  345. min = val;
  346. first = 0;
  347. }
  348. else {
  349. if (val > max)
  350. max = val;
  351. if (val < min)
  352. min = val;
  353. }
  354. if (compatible) {
  355. if (type & GV_POINTS) {
  356. sum += val;
  357. sumsq += val * val;
  358. sumcb += val * val * val;
  359. sumqt += val * val * val * val;
  360. sum_abs += fabs(val);
  361. }
  362. else if (type & GV_LINES) { /* GV_LINES */
  363. double l = 1.;
  364. if (weight_flag->answer)
  365. l = Vect_line_length(Points);
  366. sum += l * val;
  367. sumsq += l * val * val;
  368. sumcb += l * val * val * val;
  369. sumqt += l * val * val * val * val;
  370. sum_abs += l * fabs(val);
  371. total_size += l;
  372. }
  373. }
  374. G_debug(3, "sum = %f total_size = %f", sum, total_size);
  375. }
  376. }
  377. }
  378. if (otype & GV_AREA) {
  379. nareas = Vect_get_num_areas(&Map);
  380. for (area = 1; area <= nareas; area++) {
  381. int i, centr;
  382. G_debug(3, "area = %d", area);
  383. centr = Vect_get_area_centroid(&Map, area);
  384. if (centr < 1)
  385. continue;
  386. G_debug(3, "centr = %d", centr);
  387. Vect_read_line(&Map, NULL, Cats, centr);
  388. for (i = 0; i < Cats->n_cats; i++) {
  389. if (Cats->field[i] == ofield) {
  390. double val = 0.0;
  391. dbCatVal *catval;
  392. G_debug(3, "cat = %d", Cats->cat[i]);
  393. if (db_CatValArray_get_value
  394. (&Cvarr, Cats->cat[i], &catval) != DB_OK) {
  395. G_debug(3, "No record for cat = %d", Cats->cat[i]);
  396. nmissing++;
  397. continue;
  398. }
  399. if (catval->isNull) {
  400. G_debug(3, "NULL value for cat = %d", Cats->cat[i]);
  401. nnull++;
  402. continue;
  403. }
  404. if (ctype == DB_C_TYPE_INT) {
  405. val = catval->val.i;
  406. }
  407. else if (ctype == DB_C_TYPE_DOUBLE) {
  408. val = catval->val.d;
  409. }
  410. count++;
  411. if (first) {
  412. max = val;
  413. min = val;
  414. first = 0;
  415. }
  416. else {
  417. if (val > max)
  418. max = val;
  419. if (val < min)
  420. min = val;
  421. }
  422. if (compatible) {
  423. double a = 1.;
  424. if (weight_flag->answer)
  425. a = Vect_get_area_area(&Map, area);
  426. sum += a * val;
  427. sumsq += a * val * val;
  428. sumcb += a * val * val * val;
  429. sumqt += a * val * val * val * val;
  430. sum_abs += a * fabs(val);
  431. total_size += a;
  432. }
  433. G_debug(4, "sum = %f total_size = %f", sum, total_size);
  434. }
  435. }
  436. }
  437. }
  438. G_debug(2, "sum = %f total_size = %f", sum, total_size);
  439. }
  440. void summary(void)
  441. {
  442. if (compatible) {
  443. if (!geometry->answer && weight_flag->answer) {
  444. mean = sum / total_size;
  445. mean_abs = sum_abs / total_size;
  446. /* Roger Bivand says it is wrong see GRASS devel list 7/2004 */
  447. /*
  448. pop_variance = (sumsq - sum * sum / total_size) / total_size;
  449. pop_stdev = sqrt(pop_variance);
  450. pop_coeff_variation = pop_stdev / (sqrt(sum * sum) / count);
  451. */
  452. }
  453. else {
  454. double n = count;
  455. mean = sum / count;
  456. mean_abs = sum_abs / count;
  457. pop_variance = (sumsq - sum * sum / count) / count;
  458. pop_stdev = sqrt(pop_variance);
  459. pop_coeff_variation = pop_stdev / (sqrt(sum * sum) / count);
  460. sample_variance = (sumsq - sum * sum / count) / (count - 1);
  461. sample_stdev = sqrt(sample_variance);
  462. kurtosis =
  463. (sumqt / count - 4 * sum * sumcb / (n * n) +
  464. 6 * sum * sum * sumsq / (n * n * n) -
  465. 3 * sum * sum * sum * sum / (n * n * n * n))
  466. / (sample_stdev * sample_stdev * sample_stdev *
  467. sample_stdev) - 3;
  468. skewness =
  469. (sumcb / n - 3 * sum * sumsq / (n * n) +
  470. 2 * sum * sum * sum / (n * n * n))
  471. / (sample_stdev * sample_stdev * sample_stdev);
  472. }
  473. }
  474. G_debug(3, "otype %d:", otype);
  475. if (shell_flag->answer) {
  476. fprintf(stdout, "n=%d\n", count);
  477. if (geometry->answer) {
  478. fprintf(stdout, "nzero=%d\n", nzero);
  479. }
  480. else {
  481. fprintf(stdout, "nmissing=%d\n", nmissing);
  482. fprintf(stdout, "nnull=%d\n", nnull);
  483. }
  484. if (count > 0) {
  485. fprintf(stdout, "min=%g\n", min);
  486. fprintf(stdout, "max=%g\n", max);
  487. fprintf(stdout, "range=%g\n", max - min);
  488. fprintf(stdout, "sum=%g\n", sum);
  489. if (compatible) {
  490. fprintf(stdout, "mean=%g\n", mean);
  491. fprintf(stdout, "mean_abs=%g\n", mean_abs);
  492. if (geometry->answer || !weight_flag->answer) {
  493. fprintf(stdout, "population_stddev=%g\n", pop_stdev);
  494. fprintf(stdout, "population_variance=%g\n", pop_variance);
  495. fprintf(stdout, "population_coeff_variation=%g\n",
  496. pop_coeff_variation);
  497. fprintf(stdout, "sample_stddev=%g\n", sample_stdev);
  498. fprintf(stdout, "sample_variance=%g\n", sample_variance);
  499. fprintf(stdout, "kurtosis=%g\n", kurtosis);
  500. fprintf(stdout, "skewness=%g\n", skewness);
  501. }
  502. }
  503. }
  504. }
  505. else {
  506. if (geometry->answer) {
  507. fprintf(stdout, "number of primitives: %d\n", nlines);
  508. fprintf(stdout, "number of non zero distances: %d\n", count);
  509. fprintf(stdout, "number of zero distances: %d\n", nzero);
  510. }
  511. else {
  512. fprintf(stdout, "number of features with non NULL attribute: %d\n",
  513. count);
  514. fprintf(stdout, "number of missing attributes: %d\n", nmissing);
  515. fprintf(stdout, "number of NULL attributes: %d\n", nnull);
  516. }
  517. if (count > 0) {
  518. fprintf(stdout, "minimum: %g\n", min);
  519. fprintf(stdout, "maximum: %g\n", max);
  520. fprintf(stdout, "range: %g\n", max - min);
  521. fprintf(stdout, "sum: %g\n", sum);
  522. if (compatible) {
  523. fprintf(stdout, "mean: %g\n", mean);
  524. fprintf(stdout, "mean of absolute values: %g\n", mean_abs);
  525. if (geometry->answer || !weight_flag->answer) {
  526. fprintf(stdout, "population standard deviation: %g\n",
  527. pop_stdev);
  528. fprintf(stdout, "population variance: %g\n", pop_variance);
  529. fprintf(stdout, "population coefficient of variation: %g\n",
  530. pop_coeff_variation);
  531. fprintf(stdout, "sample standard deviation: %g\n",
  532. sample_stdev);
  533. fprintf(stdout, "sample variance: %g\n", sample_variance);
  534. fprintf(stdout, "kurtosis: %g\n", kurtosis);
  535. fprintf(stdout, "skewness: %g\n", skewness);
  536. }
  537. }
  538. }
  539. }
  540. /* TODO: mode, skewness, kurtosis */
  541. /* Not possible to calculate for point distance, since we don't collect the population */
  542. if (ext_flag->answer && compatible &&
  543. ((otype & GV_POINTS) || !weight_flag->answer) &&
  544. !geometry->answer && count > 0) {
  545. double quartile_25 = 0.0, quartile_75 = 0.0, quartile_perc = 0.0;
  546. double median = 0.0;
  547. int qpos_25, qpos_75, qpos_perc;
  548. qpos_25 = (int)(count * 0.25 - 0.5);
  549. qpos_75 = (int)(count * 0.75 - 0.5);
  550. qpos_perc = (int)(count * perc / 100. - 0.5);
  551. if (db_CatValArray_sort_by_value(&Cvarr) != DB_OK)
  552. G_fatal_error(_("Cannot sort the key/value array"));
  553. if (Cvarr.ctype == DB_C_TYPE_INT) {
  554. quartile_25 = (Cvarr.value[qpos_25]).val.i;
  555. if (count % 2) /* odd */
  556. median = (Cvarr.value[(int)(count / 2)]).val.i;
  557. else /* even */
  558. median =
  559. ((Cvarr.value[count / 2 - 1]).val.i +
  560. (Cvarr.value[count / 2]).val.i) / 2.0;
  561. quartile_75 = (Cvarr.value[qpos_75]).val.i;
  562. quartile_perc = (Cvarr.value[qpos_perc]).val.i;
  563. }
  564. else { /* must be DB_C_TYPE_DOUBLE */
  565. quartile_25 = (Cvarr.value[qpos_25]).val.d;
  566. if (count % 2) /* odd */
  567. median = (Cvarr.value[(int)(count / 2)]).val.d;
  568. else /* even */
  569. median =
  570. ((Cvarr.value[count / 2 - 1]).val.d +
  571. (Cvarr.value[count / 2]).val.d) / 2.0;
  572. quartile_75 = (Cvarr.value[qpos_75]).val.d;
  573. quartile_perc = (Cvarr.value[qpos_perc]).val.d;
  574. }
  575. if (shell_flag->answer) {
  576. fprintf(stdout, "first_quartile=%g\n", quartile_25);
  577. fprintf(stdout, "median=%g\n", median);
  578. fprintf(stdout, "third_quartile=%g\n", quartile_75);
  579. fprintf(stdout, "percentile_%d=%g\n", perc, quartile_perc);
  580. }
  581. else {
  582. fprintf(stdout, "1st quartile: %g\n", quartile_25);
  583. if (count % 2)
  584. fprintf(stdout, "median (odd number of cells): %g\n", median);
  585. else
  586. fprintf(stdout, "median (even number of cells): %g\n",
  587. median);
  588. fprintf(stdout, "3rd quartile: %g\n", quartile_75);
  589. if (perc % 10 == 1 && perc != 11)
  590. fprintf(stdout, "%dst percentile: %g\n", perc, quartile_perc);
  591. else if (perc % 10 == 2 && perc != 12)
  592. fprintf(stdout, "%dnd percentile: %g\n", perc, quartile_perc);
  593. else if (perc % 10 == 3 && perc != 13)
  594. fprintf(stdout, "%drd percentile: %g\n", perc, quartile_perc);
  595. else
  596. fprintf(stdout, "%dth percentile: %g\n", perc, quartile_perc);
  597. }
  598. }
  599. }