main.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. /***************************************************************
  2. *
  3. * MODULE: v.univar
  4. *
  5. * AUTHOR(S): Radim Blazek
  6. * Hamish Bowman, University of Otago, New Zealand (r.univar2)
  7. * Martin Landa (extended stats & OGR support)
  8. *
  9. * PURPOSE: Univariate Statistics for attribute
  10. *
  11. * COPYRIGHT: (C) 2004-2014 by the GRASS Development Team
  12. *
  13. * This program is free software under the GNU General
  14. * Public License (>=v2). Read the file COPYING that
  15. * comes with GRASS for details.
  16. *
  17. **************************************************************/
  18. /* TODO
  19. * - add flag to weigh by line/boundary length and area size
  20. * Roger Bivand on GRASS devel ml on July 2 2004
  21. * http://lists.osgeo.org/pipermail/grass-dev/2004-July/014976.html
  22. * "[...] calculating weighted means, weighting by line length
  23. * or area surface size [does not make sense]. I think it would be
  24. * better to treat each line or area as a discrete, unweighted, unit
  25. * unless some reason to the contrary is given, just like points/sites.
  26. * It is probably more important to handle missing data gracefully
  27. * than weight the means or other statistics, I think. There may be
  28. * reasons to weigh sometimes, but most often I see ratios or rates of
  29. * two variables, rather than of a single variable and length or area."
  30. * MM 04 2013: Done
  31. *
  32. * - use geodesic distances for the geometry option (distance to closest
  33. * other feature)
  34. *
  35. * - use sample instead of population mean/stddev
  36. * */
  37. #include <stdlib.h>
  38. #include <string.h>
  39. #include <math.h>
  40. #include <grass/gis.h>
  41. #include <grass/vector.h>
  42. #include <grass/dbmi.h>
  43. #include <grass/glocale.h>
  44. static void select_from_geometry(void);
  45. static void select_from_database(void);
  46. static void summary(void);
  47. struct Option *field_opt, *where_opt, *col_opt;
  48. struct Flag *shell_flag, *ext_flag, *weight_flag, *geometry;
  49. struct Map_info Map;
  50. struct line_cats *Cats;
  51. struct field_info *Fi;
  52. dbDriver *Driver;
  53. dbCatValArray Cvarr;
  54. int otype, ofield;
  55. int compatible = 1; /* types are compatible: point+centroid or line+boundary or area */
  56. int nmissing = 0; /* number of missing atttributes */
  57. int nnull = 0; /* number of null values */
  58. int nzero = 0; /* number of zero distances */
  59. int first = 1;
  60. /* Statistics */
  61. int count = 0; /* number of features with non-null attribute */
  62. int nlines; /* number of primitives */
  63. double sum = 0.0;
  64. double sumsq = 0.0;
  65. double sumcb = 0.0;
  66. double sumqt = 0.0;
  67. double sum_abs = 0.0;
  68. double min = 0.0 / 0.0; /* init as nan */
  69. double max = 0.0 / 0.0;
  70. double mean, mean_abs, pop_variance, sample_variance, pop_stdev,
  71. sample_stdev, pop_coeff_variation, kurtosis, skewness;
  72. double total_size = 0.0; /* total size: length/area */
  73. /* Extended statistics */
  74. int perc;
  75. int main(int argc, char *argv[])
  76. {
  77. struct GModule *module;
  78. struct Option *map_opt, *type_opt,
  79. *percentile;
  80. module = G_define_module();
  81. G_add_keyword(_("vector"));
  82. G_add_keyword(_("statistics"));
  83. G_add_keyword(_("univariate statistics"));
  84. G_add_keyword(_("attribute table"));
  85. G_add_keyword(_("geometry"));
  86. module->label =
  87. _("Calculates univariate statistics for attribute.");
  88. module->description = _("Variance and standard "
  89. "deviation is calculated only for points if specified.");
  90. map_opt = G_define_standard_option(G_OPT_V_MAP);
  91. field_opt = G_define_standard_option(G_OPT_V_FIELD);
  92. type_opt = G_define_standard_option(G_OPT_V_TYPE);
  93. type_opt->options = "point,line,boundary,centroid,area";
  94. type_opt->answer = "point,line,area";
  95. col_opt = G_define_standard_option(G_OPT_DB_COLUMN);
  96. col_opt->required = NO;
  97. where_opt = G_define_standard_option(G_OPT_DB_WHERE);
  98. percentile = G_define_option();
  99. percentile->key = "percentile";
  100. percentile->type = TYPE_INTEGER;
  101. percentile->required = NO;
  102. percentile->options = "0-100";
  103. percentile->answer = "90";
  104. percentile->description =
  105. _("Percentile to calculate (requires extended statistics flag)");
  106. shell_flag = G_define_flag();
  107. shell_flag->key = 'g';
  108. shell_flag->description = _("Print the stats in shell script style");
  109. ext_flag = G_define_flag();
  110. ext_flag->key = 'e';
  111. ext_flag->description = _("Calculate extended statistics");
  112. weight_flag = G_define_flag();
  113. weight_flag->key = 'w';
  114. weight_flag->description = _("Weigh by line length or area size");
  115. geometry = G_define_flag();
  116. geometry->key = 'd';
  117. geometry->description = _("Calculate geometric distances instead of attribute statistics");
  118. G_gisinit(argv[0]);
  119. if (G_parser(argc, argv))
  120. exit(EXIT_FAILURE);
  121. /* Only require col_opt answer if -g flag is not set */
  122. if (!col_opt->answer && !geometry->answer) {
  123. G_fatal_error(_("Required parameter <%s> not set:\n\t(%s)"), col_opt->key, col_opt->description);
  124. }
  125. otype = Vect_option_to_types(type_opt);
  126. perc = atoi(percentile->answer);
  127. Cats = Vect_new_cats_struct();
  128. /* open input vector */
  129. Vect_set_open_level(2);
  130. if (Vect_open_old2(&Map, map_opt->answer, "", field_opt->answer) < 0)
  131. G_fatal_error(_("Unable to open vector map <%s>"), map_opt->answer);
  132. ofield = Vect_get_field_number(&Map, field_opt->answer);
  133. if ((otype & GV_POINT) && Vect_get_num_primitives(&Map, GV_POINT) == 0) {
  134. otype &= ~(GV_POINT);
  135. if (otype & GV_POINT)
  136. G_fatal_error("Bye");
  137. }
  138. if ((otype & GV_CENTROID) && Vect_get_num_primitives(&Map, GV_CENTROID) == 0) {
  139. otype &= ~(GV_CENTROID);
  140. }
  141. if ((otype & GV_LINE) && Vect_get_num_primitives(&Map, GV_LINE) == 0) {
  142. otype &= ~(GV_LINE);
  143. }
  144. if ((otype & GV_BOUNDARY) && Vect_get_num_primitives(&Map, GV_BOUNDARY) == 0) {
  145. otype &= ~(GV_BOUNDARY);
  146. }
  147. if ((otype & GV_AREA) && Vect_get_num_areas(&Map) == 0) {
  148. otype &= ~(GV_AREA);
  149. }
  150. /* Check if types are compatible */
  151. if ((otype & GV_POINTS) && ((otype & GV_LINES) || (otype & GV_AREA)))
  152. compatible = 0;
  153. if ((otype & GV_LINES) && (otype & GV_AREA))
  154. compatible = 0;
  155. if (!compatible && geometry->answer)
  156. compatible = 1; /* distances is compatible with GV_POINTS and GV_LINES */
  157. if (!compatible && !weight_flag->answer)
  158. compatible = 1; /* attributes are always compatible without weight */
  159. if (geometry->answer && (otype & GV_AREA))
  160. G_fatal_error(_("Geometry distances are not supported for areas. Use '%s' instead."), "v.distance");
  161. if (!compatible) {
  162. G_warning(_("Incompatible vector type(s) specified, only number of features, minimum, maximum and range "
  163. "can be calculated"));
  164. }
  165. if (ext_flag->answer && (!(otype & GV_POINTS) || geometry->answer)) {
  166. G_warning(_("Extended statistics is currently supported only for points/centroids"));
  167. }
  168. if (geometry->answer)
  169. select_from_geometry();
  170. else
  171. select_from_database();
  172. summary();
  173. Vect_close(&Map);
  174. exit(EXIT_SUCCESS);
  175. }
  176. void select_from_geometry(void)
  177. {
  178. int i, j, k, ncats, *cats;
  179. int type;
  180. struct line_pnts *iPoints, *jPoints;
  181. iPoints = Vect_new_line_struct();
  182. jPoints = Vect_new_line_struct();
  183. if (where_opt->answer != NULL) {
  184. if (ofield < 1) {
  185. G_fatal_error(_("'layer' must be > 0 for 'where'."));
  186. }
  187. Fi = Vect_get_field(&Map, ofield);
  188. if (!Fi) {
  189. G_fatal_error(_("Database connection not defined for layer %d"),
  190. ofield);
  191. }
  192. Driver = db_start_driver_open_database(Fi->driver, Fi->database);
  193. if (Driver == NULL)
  194. G_fatal_error("Unable to open database <%s> by driver <%s>",
  195. Fi->database, Fi->driver);
  196. db_set_error_handler_driver(Driver);
  197. ncats = db_select_int(Driver, Fi->table, Fi->key, where_opt->answer,
  198. &cats);
  199. if (ncats == -1)
  200. G_fatal_error(_("Unable select categories from table <%s>"), Fi->table);
  201. db_close_database_shutdown_driver(Driver);
  202. }
  203. else
  204. ncats = 0;
  205. count = 0;
  206. nlines = Vect_get_num_lines(&Map);
  207. G_message(_("Calculating geometric distances between %d primitives..."), nlines);
  208. /* Start calculating the statistics based on distance to all other primitives.
  209. Use the centroid of areas and the first point of lines */
  210. for (i = 1; i <= nlines; i++) {
  211. G_percent(i, nlines, 2);
  212. type = Vect_read_line(&Map, iPoints, Cats, i);
  213. if (!(type & otype))
  214. continue;
  215. if (where_opt->answer) {
  216. int ok = FALSE;
  217. for (j = 0; j < Cats->n_cats; j++) {
  218. if (Vect_cat_in_array(Cats->cat[j], cats, ncats)) {
  219. ok = TRUE;
  220. break;
  221. }
  222. }
  223. if (!ok)
  224. continue;
  225. }
  226. for (j = i + 1; j < nlines; j++) {
  227. /* get distance to this object */
  228. double val = 0.0;
  229. type = Vect_read_line(&Map, jPoints, Cats, j);
  230. if (!(type & otype))
  231. continue;
  232. /* now calculate the min distance between each point in line i with line j */
  233. for (k = 0; k < iPoints->n_points; k++) {
  234. double dmin = 0.0;
  235. Vect_line_distance(jPoints, iPoints->x[k], iPoints->y[k], iPoints->z[k], 1,
  236. NULL, NULL, NULL, &dmin, NULL, NULL);
  237. if((k == 0) || (dmin < val)) {
  238. val = dmin;
  239. }
  240. }
  241. if (val > 0 && iPoints->n_points > 1) {
  242. for (k = 0; k < jPoints->n_points; k++) {
  243. double dmin = 0.0;
  244. Vect_line_distance(iPoints, jPoints->x[k], jPoints->y[k], jPoints->z[k], 1,
  245. NULL, NULL, NULL, &dmin, NULL, NULL);
  246. if(dmin < val) {
  247. val = dmin;
  248. }
  249. }
  250. }
  251. if (val > 0 && iPoints->n_points > 1 && jPoints->n_points > 1) {
  252. if (Vect_line_check_intersection(iPoints, jPoints, Vect_is_3d(&Map)))
  253. val = 0;
  254. }
  255. if (val == 0) {
  256. nzero++;
  257. continue;
  258. }
  259. if (first) {
  260. max = val;
  261. min = val;
  262. first = 0;
  263. }
  264. else {
  265. if (val > max)
  266. max = val;
  267. if (val < min)
  268. min = val;
  269. }
  270. sum += val;
  271. sumsq += val * val;
  272. sumcb += val * val * val;
  273. sumqt += val * val * val * val;
  274. sum_abs += fabs(val);
  275. count++;
  276. G_debug(3, "i=%d j=%d sum = %f val=%f", i, j, sum, val);
  277. }
  278. }
  279. }
  280. void select_from_database(void)
  281. {
  282. int nrec, ctype, nlines, line, nareas, area;
  283. struct line_pnts *Points;
  284. Fi = Vect_get_field(&Map, ofield);
  285. if (Fi == NULL) {
  286. G_fatal_error(_(" Database connection not defined for layer <%s>"), field_opt->answer);
  287. }
  288. Driver = db_start_driver_open_database(Fi->driver, Fi->database);
  289. if (Driver == NULL)
  290. G_fatal_error("Unable to open database <%s> by driver <%s>",
  291. Fi->database, Fi->driver);
  292. db_set_error_handler_driver(Driver);
  293. /* Note do not check if the column exists in the table because it may be an expression */
  294. db_CatValArray_init(&Cvarr);
  295. nrec =
  296. db_select_CatValArray(Driver, Fi->table, Fi->key, col_opt->answer,
  297. where_opt->answer, &Cvarr);
  298. G_debug(2, "db_select_CatValArray() nrec = %d", nrec);
  299. ctype = Cvarr.ctype;
  300. if (ctype != DB_C_TYPE_INT && ctype != DB_C_TYPE_DOUBLE)
  301. G_fatal_error(_("Column type not supported"));
  302. if (nrec < 0)
  303. G_fatal_error(_("Unable to select data from table"));
  304. db_close_database_shutdown_driver(Driver);
  305. Points = Vect_new_line_struct();
  306. /* Lines */
  307. nlines = 0;
  308. if ((otype & GV_POINTS) || (otype & GV_LINES))
  309. nlines = Vect_get_num_lines(&Map);
  310. G_debug(1, "select_from_database: %d points", nlines);
  311. for (line = 1; line <= nlines; line++) {
  312. int i, type;
  313. G_debug(3, "line = %d", line);
  314. G_percent(line, nlines, 2);
  315. type = Vect_read_line(&Map, Points, Cats, line);
  316. if (!(type & otype))
  317. continue;
  318. for (i = 0; i < Cats->n_cats; i++) {
  319. if (Cats->field[i] == ofield) {
  320. double val = 0.0;
  321. dbCatVal *catval;
  322. G_debug(3, "cat = %d", Cats->cat[i]);
  323. if (db_CatValArray_get_value(&Cvarr, Cats->cat[i], &catval) !=
  324. DB_OK) {
  325. G_debug(3, "No record for cat = %d", Cats->cat[i]);
  326. nmissing++;
  327. continue;
  328. }
  329. if (catval->isNull) {
  330. G_debug(3, "NULL value for cat = %d", Cats->cat[i]);
  331. nnull++;
  332. continue;
  333. }
  334. if (ctype == DB_C_TYPE_INT) {
  335. val = catval->val.i;
  336. }
  337. else if (ctype == DB_C_TYPE_DOUBLE) {
  338. val = catval->val.d;
  339. }
  340. count++;
  341. if (first) {
  342. max = val;
  343. min = val;
  344. first = 0;
  345. }
  346. else {
  347. if (val > max)
  348. max = val;
  349. if (val < min)
  350. min = val;
  351. }
  352. if (compatible) {
  353. if (type & GV_POINTS) {
  354. sum += val;
  355. sumsq += val * val;
  356. sumcb += val * val * val;
  357. sumqt += val * val * val * val;
  358. sum_abs += fabs(val);
  359. }
  360. else if (type & GV_LINES) { /* GV_LINES */
  361. double l = 1.;
  362. if (weight_flag->answer)
  363. l = Vect_line_length(Points);
  364. sum += l * val;
  365. sumsq += l * val * val;
  366. sumcb += l * val * val * val;
  367. sumqt += l * val * val * val * val;
  368. sum_abs += l * fabs(val);
  369. total_size += l;
  370. }
  371. }
  372. G_debug(3, "sum = %f total_size = %f", sum, total_size);
  373. }
  374. }
  375. }
  376. if (otype & GV_AREA) {
  377. nareas = Vect_get_num_areas(&Map);
  378. for (area = 1; area <= nareas; area++) {
  379. int i, centr;
  380. G_debug(3, "area = %d", area);
  381. centr = Vect_get_area_centroid(&Map, area);
  382. if (centr < 1)
  383. continue;
  384. G_debug(3, "centr = %d", centr);
  385. Vect_read_line(&Map, NULL, Cats, centr);
  386. for (i = 0; i < Cats->n_cats; i++) {
  387. if (Cats->field[i] == ofield) {
  388. double val = 0.0;
  389. dbCatVal *catval;
  390. G_debug(3, "cat = %d", Cats->cat[i]);
  391. if (db_CatValArray_get_value
  392. (&Cvarr, Cats->cat[i], &catval) != DB_OK) {
  393. G_debug(3, "No record for cat = %d", Cats->cat[i]);
  394. nmissing++;
  395. continue;
  396. }
  397. if (catval->isNull) {
  398. G_debug(3, "NULL value for cat = %d", Cats->cat[i]);
  399. nnull++;
  400. continue;
  401. }
  402. if (ctype == DB_C_TYPE_INT) {
  403. val = catval->val.i;
  404. }
  405. else if (ctype == DB_C_TYPE_DOUBLE) {
  406. val = catval->val.d;
  407. }
  408. count++;
  409. if (first) {
  410. max = val;
  411. min = val;
  412. first = 0;
  413. }
  414. else {
  415. if (val > max)
  416. max = val;
  417. if (val < min)
  418. min = val;
  419. }
  420. if (compatible) {
  421. double a = 1.;
  422. if (weight_flag->answer)
  423. a = Vect_get_area_area(&Map, area);
  424. sum += a * val;
  425. sumsq += a * val * val;
  426. sumcb += a * val * val * val;
  427. sumqt += a * val * val * val * val;
  428. sum_abs += a * fabs(val);
  429. total_size += a;
  430. }
  431. G_debug(4, "sum = %f total_size = %f", sum, total_size);
  432. }
  433. }
  434. }
  435. }
  436. G_debug(2, "sum = %f total_size = %f", sum, total_size);
  437. }
  438. void summary(void)
  439. {
  440. if (compatible) {
  441. if (!geometry->answer && weight_flag->answer) {
  442. mean = sum / total_size;
  443. mean_abs = sum_abs / total_size;
  444. /* Roger Bivand says it is wrong see GRASS devel list 7/2004 */
  445. /*
  446. pop_variance = (sumsq - sum * sum / total_size) / total_size;
  447. pop_stdev = sqrt(pop_variance);
  448. pop_coeff_variation = pop_stdev / (sqrt(sum * sum) / count);
  449. */
  450. }
  451. else {
  452. double n = count;
  453. mean = sum / count;
  454. mean_abs = sum_abs / count;
  455. pop_variance = (sumsq - sum * sum / count) / count;
  456. pop_stdev = sqrt(pop_variance);
  457. pop_coeff_variation = pop_stdev / (sqrt(sum * sum) / count);
  458. sample_variance = (sumsq - sum * sum / count) / (count - 1);
  459. sample_stdev = sqrt(sample_variance);
  460. kurtosis =
  461. (sumqt / count - 4 * sum * sumcb / (n * n) +
  462. 6 * sum * sum * sumsq / (n * n * n) -
  463. 3 * sum * sum * sum * sum / (n * n * n * n))
  464. / (sample_stdev * sample_stdev * sample_stdev *
  465. sample_stdev) - 3;
  466. skewness =
  467. (sumcb / n - 3 * sum * sumsq / (n * n) +
  468. 2 * sum * sum * sum / (n * n * n))
  469. / (sample_stdev * sample_stdev * sample_stdev);
  470. }
  471. }
  472. G_debug(3, "otype %d:", otype);
  473. if (shell_flag->answer) {
  474. fprintf(stdout, "n=%d\n", count);
  475. if (geometry->answer) {
  476. fprintf(stdout, "nzero=%d\n", nzero);
  477. }
  478. else {
  479. fprintf(stdout, "nmissing=%d\n", nmissing);
  480. fprintf(stdout, "nnull=%d\n", nnull);
  481. }
  482. if (count > 0) {
  483. fprintf(stdout, "min=%g\n", min);
  484. fprintf(stdout, "max=%g\n", max);
  485. fprintf(stdout, "range=%g\n", max - min);
  486. fprintf(stdout, "sum=%g\n", sum);
  487. if (compatible) {
  488. fprintf(stdout, "mean=%g\n", mean);
  489. fprintf(stdout, "mean_abs=%g\n", mean_abs);
  490. if (geometry->answer || !weight_flag->answer) {
  491. fprintf(stdout, "population_stddev=%g\n", pop_stdev);
  492. fprintf(stdout, "population_variance=%g\n", pop_variance);
  493. fprintf(stdout, "population_coeff_variation=%g\n",
  494. pop_coeff_variation);
  495. fprintf(stdout, "sample_stddev=%g\n", sample_stdev);
  496. fprintf(stdout, "sample_variance=%g\n", sample_variance);
  497. fprintf(stdout, "kurtosis=%g\n", kurtosis);
  498. fprintf(stdout, "skewness=%g\n", skewness);
  499. }
  500. }
  501. }
  502. }
  503. else {
  504. if (geometry->answer) {
  505. fprintf(stdout, "number of primitives: %d\n", nlines);
  506. fprintf(stdout, "number of non zero distances: %d\n", count);
  507. fprintf(stdout, "number of zero distances: %d\n", nzero);
  508. }
  509. else {
  510. fprintf(stdout, "number of features with non NULL attribute: %d\n",
  511. count);
  512. fprintf(stdout, "number of missing attributes: %d\n", nmissing);
  513. fprintf(stdout, "number of NULL attributes: %d\n", nnull);
  514. }
  515. if (count > 0) {
  516. fprintf(stdout, "minimum: %g\n", min);
  517. fprintf(stdout, "maximum: %g\n", max);
  518. fprintf(stdout, "range: %g\n", max - min);
  519. fprintf(stdout, "sum: %g\n", sum);
  520. if (compatible) {
  521. fprintf(stdout, "mean: %g\n", mean);
  522. fprintf(stdout, "mean of absolute values: %g\n", mean_abs);
  523. if (geometry->answer || !weight_flag->answer) {
  524. fprintf(stdout, "population standard deviation: %g\n",
  525. pop_stdev);
  526. fprintf(stdout, "population variance: %g\n", pop_variance);
  527. fprintf(stdout, "population coefficient of variation: %g\n",
  528. pop_coeff_variation);
  529. fprintf(stdout, "sample standard deviation: %g\n",
  530. sample_stdev);
  531. fprintf(stdout, "sample variance: %g\n", sample_variance);
  532. fprintf(stdout, "kurtosis: %g\n", kurtosis);
  533. fprintf(stdout, "skewness: %g\n", skewness);
  534. }
  535. }
  536. }
  537. }
  538. /* TODO: mode, skewness, kurtosis */
  539. /* Not possible to calculate for point distance, since we don't collect the population */
  540. if (ext_flag->answer && compatible &&
  541. ((otype & GV_POINTS) || !weight_flag->answer) &&
  542. !geometry->answer && count > 0) {
  543. double quartile_25 = 0.0, quartile_75 = 0.0, quartile_perc = 0.0;
  544. double median = 0.0;
  545. int qpos_25, qpos_75, qpos_perc;
  546. qpos_25 = (int)(count * 0.25 - 0.5);
  547. qpos_75 = (int)(count * 0.75 - 0.5);
  548. qpos_perc = (int)(count * perc / 100. - 0.5);
  549. if (db_CatValArray_sort_by_value(&Cvarr) != DB_OK)
  550. G_fatal_error(_("Cannot sort the key/value array"));
  551. if (Cvarr.ctype == DB_C_TYPE_INT) {
  552. quartile_25 = (Cvarr.value[qpos_25]).val.i;
  553. if (count % 2) /* odd */
  554. median = (Cvarr.value[(int)(count / 2)]).val.i;
  555. else /* even */
  556. median =
  557. ((Cvarr.value[count / 2 - 1]).val.i +
  558. (Cvarr.value[count / 2]).val.i) / 2.0;
  559. quartile_75 = (Cvarr.value[qpos_75]).val.i;
  560. quartile_perc = (Cvarr.value[qpos_perc]).val.i;
  561. }
  562. else { /* must be DB_C_TYPE_DOUBLE */
  563. quartile_25 = (Cvarr.value[qpos_25]).val.d;
  564. if (count % 2) /* odd */
  565. median = (Cvarr.value[(int)(count / 2)]).val.d;
  566. else /* even */
  567. median =
  568. ((Cvarr.value[count / 2 - 1]).val.d +
  569. (Cvarr.value[count / 2]).val.d) / 2.0;
  570. quartile_75 = (Cvarr.value[qpos_75]).val.d;
  571. quartile_perc = (Cvarr.value[qpos_perc]).val.d;
  572. }
  573. if (shell_flag->answer) {
  574. fprintf(stdout, "first_quartile=%g\n", quartile_25);
  575. fprintf(stdout, "median=%g\n", median);
  576. fprintf(stdout, "third_quartile=%g\n", quartile_75);
  577. fprintf(stdout, "percentile_%d=%g\n", perc, quartile_perc);
  578. }
  579. else {
  580. fprintf(stdout, "1st quartile: %g\n", quartile_25);
  581. if (count % 2)
  582. fprintf(stdout, "median (odd number of cells): %g\n", median);
  583. else
  584. fprintf(stdout, "median (even number of cells): %g\n",
  585. median);
  586. fprintf(stdout, "3rd quartile: %g\n", quartile_75);
  587. if (perc % 10 == 1 && perc != 11)
  588. fprintf(stdout, "%dst percentile: %g\n", perc, quartile_perc);
  589. else if (perc % 10 == 2 && perc != 12)
  590. fprintf(stdout, "%dnd percentile: %g\n", perc, quartile_perc);
  591. else if (perc % 10 == 3 && perc != 13)
  592. fprintf(stdout, "%drd percentile: %g\n", perc, quartile_perc);
  593. else
  594. fprintf(stdout, "%dth percentile: %g\n", perc, quartile_perc);
  595. }
  596. }
  597. }