class.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /* functions to classify sorted arrays of doubles and fill a vector of classbreaks */
  2. #include <grass/arraystats.h>
  3. int class_interval(double *data, int count, int nbreaks, double *classbreaks)
  4. {
  5. double min, max;
  6. double step;
  7. int i = 0;
  8. min = data[0];
  9. max = data[count - 1];
  10. step = (max - min) / (nbreaks + 1);
  11. for (i = 0; i < nbreaks; i++)
  12. classbreaks[i] = min + (step * (i + 1));
  13. return (1);
  14. }
  15. double class_stdev(double *data, int count, int nbreaks, double *classbreaks)
  16. {
  17. struct GASTATS stats;
  18. int i;
  19. int nbclass;
  20. double scale = 1.0;
  21. basic_stats(data, count, &stats);
  22. nbclass = nbreaks + 1;
  23. if (nbclass % 2 == 1) { /* number of classes is uneven so we center middle class on mean */
  24. /* find appropriate fraction of stdev for step */
  25. i = 1;
  26. while (i) {
  27. if (((stats.mean + stats.stdev * scale / 2) +
  28. (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) ||
  29. ((stats.mean - stats.stdev * scale / 2) -
  30. (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min))
  31. scale = scale / 2;
  32. else
  33. i = 0;
  34. }
  35. /* classbreaks below the mean */
  36. for (i = 0; i < nbreaks / 2; i++)
  37. classbreaks[i] =
  38. (stats.mean - stats.stdev * scale / 2) -
  39. stats.stdev * scale * (nbreaks / 2 - (i + 1));
  40. /* classbreaks above the mean */
  41. for (i = i; i < nbreaks; i++)
  42. classbreaks[i] =
  43. (stats.mean + stats.stdev * scale / 2) +
  44. stats.stdev * scale * (i - nbreaks / 2);
  45. }
  46. else { /* number of classes is even so mean is a classbreak */
  47. /* decide whether to use 1*stdev or 0.5*stdev as step */
  48. i = 1;
  49. while (i) {
  50. if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
  51. stats.max) ||
  52. ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
  53. stats.min))
  54. scale = scale / 2;
  55. else
  56. i = 0;
  57. }
  58. /* classbreaks below the mean and on the mean */
  59. for (i = 0; i <= nbreaks / 2; i++)
  60. classbreaks[i] =
  61. stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
  62. /* classbreaks above the mean */
  63. for (i = i; i < nbreaks; i++)
  64. classbreaks[i] =
  65. stats.mean + stats.stdev * scale * (i - nbreaks / 2);
  66. }
  67. return (scale);
  68. }
  69. int class_quant(double *data, int count, int nbreaks, double *classbreaks)
  70. {
  71. int i, step;
  72. step = count / (nbreaks + 1);
  73. for (i = 0; i < nbreaks; i++)
  74. classbreaks[i] = data[step * (i + 1)];
  75. return (1);
  76. }
  77. int class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
  78. {
  79. int i, j;
  80. double *lequi; /*Vector of scale factors for probabilities of the normal distribution */
  81. struct GASTATS stats;
  82. int nbclass;
  83. nbclass = *nbreaks + 1;
  84. lequi = G_malloc(*nbreaks * sizeof(double));
  85. /*The following values come from the normal distribution and
  86. * will be used as:
  87. * classbreak[i] = (lequi[i] * stdev) + mean;
  88. */
  89. if (nbclass < 3) {
  90. lequi[0] = 0;
  91. }
  92. else if (nbclass == 3) {
  93. lequi[0] = -0.43076;
  94. lequi[1] = 0.43076;
  95. }
  96. else if (nbclass == 4) {
  97. lequi[0] = -0.6745;
  98. lequi[1] = 0;
  99. lequi[2] = 0.6745;
  100. }
  101. else if (nbclass == 5) {
  102. lequi[0] = -0.8416;
  103. lequi[1] = -0.2533;
  104. lequi[2] = 0.2533;
  105. lequi[3] = 0.8416;
  106. }
  107. else if (nbclass == 6) {
  108. lequi[0] = -0.9676;
  109. lequi[1] = -0.43076;
  110. lequi[2] = 0;
  111. lequi[3] = 0.43076;
  112. lequi[4] = 0.9676;
  113. }
  114. else if (nbclass == 7) {
  115. lequi[0] = -1.068;
  116. lequi[1] = -0.566;
  117. lequi[2] = -0.18;
  118. lequi[3] = 0.18;
  119. lequi[4] = 0.566;
  120. lequi[5] = 1.068;
  121. }
  122. else if (nbclass == 8) {
  123. lequi[0] = -1.1507;
  124. lequi[1] = -0.6745;
  125. lequi[2] = -0.3187;
  126. lequi[3] = 0;
  127. lequi[4] = 0.3187;
  128. lequi[5] = 0.6745;
  129. lequi[6] = 1.1507;
  130. }
  131. else if (nbclass == 9) {
  132. lequi[0] = -1.2208;
  133. lequi[1] = -0.7648;
  134. lequi[2] = -0.4385;
  135. lequi[3] = -0.1397;
  136. lequi[4] = 0.1397;
  137. lequi[5] = 0.4385;
  138. lequi[6] = 0.7648;
  139. lequi[7] = 1.2208;
  140. }
  141. else if (nbclass == 10) {
  142. lequi[0] = -1.28155;
  143. lequi[1] = -0.84162;
  144. lequi[2] = -0.5244;
  145. lequi[3] = -0.25335;
  146. lequi[4] = 0;
  147. lequi[5] = 0.25335;
  148. lequi[6] = 0.5244;
  149. lequi[7] = 0.84162;
  150. lequi[8] = 1.28155;
  151. }
  152. else {
  153. G_fatal_error
  154. ("Equiprobable classbreaks currently limited to 10 classes");
  155. }
  156. basic_stats(data, count, &stats);
  157. /*check if any of the classbreaks would fall outside of the range min-max */
  158. j = 0;
  159. for (i = 0; i < *nbreaks; i++) {
  160. if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
  161. (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
  162. j++;
  163. }
  164. }
  165. if (j < (*nbreaks)) {
  166. G_warning(_
  167. ("There are classbreaks outside the range min-max. Number of classes reduced to %i, but using probabilities for %i classes."),
  168. j + 1, *nbreaks + 1);
  169. G_realloc(classbreaks, j * sizeof(double));
  170. for (i = 0; i < j; i++)
  171. classbreaks[i] = 0;
  172. }
  173. j = 0;
  174. for (i = 0; i < *nbreaks; i++) {
  175. if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
  176. (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
  177. classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
  178. j++;
  179. }
  180. }
  181. *nbreaks = j;
  182. G_free(lequi);
  183. return (1);
  184. }
  185. double class_discont(double *data, int count, int nbreaks, double *classbreaks)
  186. {
  187. int *num, nbclass, maxclass = 0;
  188. double *no, *zz, *nz, *xn, *co;
  189. double *x; //Vecteur des observations standardisées
  190. int i, j, k;
  191. double min = 0, max = 0, rangemax = 0;
  192. int n = 0;
  193. double rangemin = 0, xlim = 0;
  194. double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0;
  195. int nf = 0, nmax = 0;
  196. double *abc;
  197. int nd = 0;
  198. double den = 0, d = 0;
  199. int im = 0, ji = 0;
  200. int tmp = 0;
  201. int nff = 0, jj = 0, no1 = 0, no2 = 0;
  202. double f = 0, xt1 = 0, xt2 = 0, chi2 = 0, xnj_1 = 0, xj_1 = 0;
  203. /*get the number of values */
  204. n = count;
  205. nbclass = nbreaks + 1;
  206. num = G_malloc((nbclass + 1) * sizeof(int));
  207. no = G_malloc((nbclass + 1) * sizeof(double));
  208. zz = G_malloc((nbclass + 1) * sizeof(double));
  209. nz = G_malloc(3 * sizeof(double));
  210. xn = G_malloc((n + 1) * sizeof(double));
  211. co = G_malloc((nbclass + 1) * sizeof(double));
  212. /* We copy the array of values to x, in order to be able to standardize it */
  213. x = G_malloc((n + 1) * sizeof(double));
  214. x[0] = n;
  215. xn[0] = 0;
  216. min = data[0];
  217. max = data[count - 1];
  218. for (i = 1; i <= n; i++)
  219. x[i] = data[i - 1];
  220. rangemax = max - min;
  221. rangemin = rangemax;
  222. for (i = 2; i <= n; i++) {
  223. if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
  224. rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */
  225. }
  226. /* STANDARDIZATION
  227. * and creation of the number vector (xn) */
  228. for (i = 1; i <= n; i++) {
  229. x[i] = (x[i] - min) / rangemax;
  230. xn[i] = i / (double)n;
  231. }
  232. xlim = rangemin / rangemax;
  233. rangemin = rangemin / 2.0;
  234. /* Searching for the limits */
  235. num[1] = n;
  236. abc = G_malloc(3 * sizeof(double));
  237. /* Loop through possible solutions */
  238. for (i = 1; i <= nbclass; i++) {
  239. nmax = 0;
  240. dmax = 0.0;
  241. d2 = 0.0;
  242. nf = 0; /*End number */
  243. /* Loop through classes */
  244. for (j = 1; j <= i; j++) {
  245. nd = nf; /*Start number */
  246. nf = num[j];
  247. co[j] = 10e37;
  248. eqdrt(x, xn, nd, nf, abc);
  249. den = sqrt(pow(abc[1], 2) + 1.0);
  250. nd++;
  251. /* Loop through observations */
  252. for (k = nd; k <= nf; k++) {
  253. if (abc[2] == 0.0)
  254. d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
  255. else
  256. d = fabs(x[k] - abc[2]);
  257. d2 += pow(d, 2);
  258. if (x[k] - x[nd] < xlim)
  259. continue;
  260. if (x[nf] - x[k] < xlim)
  261. continue;
  262. if (d <= dmax)
  263. continue;
  264. dmax = d;
  265. nmax = k;
  266. }
  267. nd--; //A VERIFIER!
  268. if (x[nf] != x[nd]) {
  269. if (nd != 0)
  270. co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
  271. else
  272. co[j] = (xn[nf]) / (x[nf]); //A VERIFIER!
  273. }
  274. }
  275. if (i == 1)
  276. dd = d2;
  277. p = d2 / dd;
  278. for (j = 1; j <= i; j++) {
  279. no[j] = num[j];
  280. zz[j] = x[num[j]] * rangemax + min;
  281. if (j == i)
  282. continue;
  283. if (co[j] > co[j + 1]) {
  284. zz[j] = zz[j] + rangemin;
  285. continue;
  286. }
  287. zz[j] = zz[j] - rangemin;
  288. no[j] = no[j] - 1;
  289. }
  290. im = i - 1;
  291. if (im != 0.0) {
  292. for (j = 1; j <= im; j++) {
  293. ji = i + 1 - j;
  294. no[ji] -= no[ji - 1];
  295. }
  296. }
  297. if (nmax == 0) {
  298. break;
  299. }
  300. nff = i + 2;
  301. tmp = 0;
  302. for (j = 1; j <= i; j++) {
  303. jj = nff - j;
  304. if (num[jj - 1] < nmax) {
  305. num[jj] = nmax;
  306. tmp = 1;
  307. break;
  308. }
  309. num[jj] = num[jj - 1];
  310. }
  311. if (tmp == 0) {
  312. num[1] = nmax;
  313. jj = 1;
  314. }
  315. if (jj == 1) {
  316. xnj_1 = 0;
  317. xj_1 = 0;
  318. }
  319. else {
  320. xnj_1 = xn[num[jj - 1]];
  321. xj_1 = x[num[jj - 1]];
  322. }
  323. no1 = (xn[num[jj]] - xnj_1) * n;
  324. no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
  325. f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
  326. f *= n;
  327. xt1 = (x[num[jj]] - xj_1) * f;
  328. xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
  329. if (xt2 == 0) {
  330. xt2 = rangemin / 2.0 / rangemax * f;
  331. xt1 -= xt2;
  332. }
  333. else if (xt1 * xt2 == 0) {
  334. xt1 = rangemin / 2.0 / rangemax * f;
  335. xt2 -= xt1;
  336. }
  337. /*if new class break not statistically significant (alpha > 0.05), give warning */
  338. if (maxclass == 0) {
  339. chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
  340. if (chi2 < 3.84148) {
  341. G_warning(_
  342. ("discontinuities algorithm: %i class breaks or more are not statistically significant at alpha=0.05"),
  343. i);
  344. maxclass = 1;
  345. }
  346. }
  347. }
  348. /* Fill up classbreaks of i <=nbclass classes */
  349. for (j = 0; j <= (i - 1); j++)
  350. classbreaks[j] = zz[j + 1];
  351. return (chi2);
  352. }
  353. int class_frequencies(double *data, int count, int nbreaks,
  354. double *classbreaks, int *frequencies)
  355. {
  356. int i, j;
  357. double min, max;
  358. min = data[0];
  359. max = data[count - 1];
  360. /* count cases in all classes, except for last class */
  361. i = 0;
  362. for (j = 0; j < nbreaks; j++) {
  363. while (data[i] <= classbreaks[j]) {
  364. frequencies[j]++;
  365. i++;
  366. }
  367. }
  368. /*Now count cases in last class */
  369. for (i = i; i < count; i++) {
  370. frequencies[nbreaks]++;
  371. }
  372. return (1);
  373. }