cluster.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. /****************************************************************************
  2. *
  3. * MODULE: r.clump
  4. *
  5. * AUTHOR(S): Michael Shapiro - CERL
  6. * Markus Metz
  7. *
  8. * PURPOSE: Recategorizes data in a raster map layer by grouping cells
  9. * that form physically discrete areas into unique categories.
  10. *
  11. * COPYRIGHT: (C) 2006-2016 by the GRASS Development Team
  12. *
  13. * This program is free software under the GNU General Public
  14. * License (>=v2). Read the file COPYING that comes with GRASS
  15. * for details.
  16. *
  17. ***************************************************************************/
  18. #include <sys/types.h>
  19. #include <sys/stat.h>
  20. #include <fcntl.h>
  21. #include <unistd.h>
  22. #include <math.h>
  23. #include <time.h>
  24. #include <grass/gis.h>
  25. #include <grass/raster.h>
  26. #include <grass/glocale.h>
  27. #include "iseg.h"
  28. #define INCR 1024
  29. /* defeats the purpose of padding ... */
  30. #define CISNULL(r, c) (((c) == 0 || (c) == ncols + 1 ? 1 : \
  31. (FLAG_GET(globals->null_flag, (r), (c - 1)))))
  32. CELL cluster_bands(struct globals *globals)
  33. {
  34. register int col;
  35. register int i, n;
  36. /* input */
  37. DCELL **prev_in, **cur_in, **temp_in;
  38. struct ngbr_stats Ri, Rk, Rn;
  39. int nin;
  40. int diag;
  41. int bcol;
  42. /* output */
  43. CELL OLD, NEW;
  44. CELL *temp_clump, out_cell;
  45. CELL *prev_clump, *cur_clump;
  46. CELL *index, *renumber;
  47. CELL label, cellmax;
  48. int nrows, ncols;
  49. int row;
  50. int len;
  51. int nalloc;
  52. char *cname;
  53. int cfd, csize;
  54. CELL cat;
  55. int mwrow, mwrow1, mwrow2, mwnrows, mwcol, mwcol1, mwcol2, mwncols, radiusc;
  56. double diff, diff2, avgdiff, ka2, hspat, hspat2;
  57. double hspec, hspecad, hspec2, hspec2ad;
  58. LARGEINT count;
  59. G_message(_("%d-band clustering with threshold %g"), globals->nbands, globals->hr);
  60. nrows = Rast_window_rows();
  61. ncols = Rast_window_cols();
  62. hspec = globals->hr;
  63. hspec2 = globals->hr * globals->hr;
  64. nin = globals->nbands;
  65. diag = (globals->nn == 8);
  66. radiusc = globals->hs;
  67. mwnrows = mwncols = radiusc * 2 + 1;
  68. /* spatial bandwidth */
  69. hspat = globals->hs;
  70. if (hspat < 1)
  71. hspat = 1.5;
  72. hspat2 = hspat * hspat;
  73. cellmax = ((CELL)1 << (sizeof(CELL) * 8 - 2)) - 1;
  74. cellmax += ((CELL)1 << (sizeof(CELL) * 8 - 2));
  75. Ri.mean = NULL;
  76. Rk.mean = NULL;
  77. Rn.mean = G_malloc(globals->datasize);
  78. /* allocate clump index */
  79. /* TODO: support smallest label ID > 1 */
  80. nalloc = INCR;
  81. index = (CELL *) G_malloc(nalloc * sizeof(CELL));
  82. index[0] = 0;
  83. renumber = NULL;
  84. /* allocate DCELL buffers two columns larger than current window */
  85. prev_in = (DCELL **) G_malloc(sizeof(DCELL *) * (ncols + 2));
  86. cur_in = (DCELL **) G_malloc(sizeof(DCELL *) * (ncols + 2));
  87. prev_in[0] = (DCELL *) G_malloc(globals->datasize * (ncols + 2) * nin);
  88. cur_in[0] = (DCELL *) G_malloc(globals->datasize * (ncols + 2) * nin);
  89. Rast_set_d_null_value(cur_in[0], (ncols + 2) * nin);
  90. Rast_set_d_null_value(prev_in[0], (ncols + 2) * nin);
  91. for (i = 1; i < ncols + 2; i++) {
  92. prev_in[i] = prev_in[i - 1] + nin;
  93. cur_in[i] = cur_in[i - 1] + nin;
  94. }
  95. /* allocate CELL buffers two columns larger than current window */
  96. len = (ncols + 2) * sizeof(CELL);
  97. prev_clump = (CELL *) G_malloc(len);
  98. cur_clump = (CELL *) G_malloc(len);
  99. /* temp file for initial clump IDs */
  100. cname = G_tempfile();
  101. if ((cfd = open(cname, O_RDWR | O_CREAT | O_EXCL, 0600)) < 0)
  102. G_fatal_error(_("Unable to open temp file"));
  103. csize = ncols * sizeof(CELL);
  104. /* initialize clump labels */
  105. G_zero(cur_clump, len);
  106. G_zero(prev_clump, len);
  107. /* TODO: support smallest label ID > 1 */
  108. label = 0;
  109. /****************************************************
  110. * PASS 1 *
  111. * pass thru the input, create initial clump labels *
  112. ****************************************************/
  113. G_message(_("Assigning initial region IDs..."));
  114. for (row = 0; row < nrows; row++) {
  115. G_percent(row, nrows, 2);
  116. for (col = 1; col <= ncols; col++) {
  117. /* get band values */
  118. Segment_get(globals->bands_out, (void *)cur_in[col], row, col - 1);
  119. Ri.mean = cur_in[col];
  120. if (CISNULL(row, col)) {
  121. cur_clump[col] = 0;
  122. continue;
  123. }
  124. hspec2ad = hspec2;
  125. if (globals->ms_adaptive) {
  126. /* adapt initial range bandwidth */
  127. mwrow1 = row - radiusc;
  128. mwrow2 = mwrow1 + mwnrows;
  129. if (mwrow1 < 0)
  130. mwrow1 = 0;
  131. if (mwrow2 > nrows)
  132. mwrow2 = nrows;
  133. mwcol1 = col - radiusc;
  134. mwcol2 = mwcol1 + mwncols;
  135. if (mwcol1 < 0)
  136. mwcol1 = 0;
  137. if (mwcol2 > ncols)
  138. mwcol2 = ncols;
  139. ka2 = hspec2; /* OTB: conductance parameter */
  140. avgdiff = 0;
  141. count = 0;
  142. for (mwrow = mwrow1; mwrow < mwrow2; mwrow++) {
  143. for (mwcol = mwcol1; mwcol < mwcol2; mwcol++) {
  144. if ((FLAG_GET(globals->null_flag, mwrow, mwcol)))
  145. continue;
  146. if (mwrow == row && mwcol == col)
  147. continue;
  148. diff = mwrow - row;
  149. diff2 = diff * diff;
  150. diff = mwcol - col;
  151. diff2 += diff * diff;
  152. if (diff2 <= hspat2) {
  153. Segment_get(globals->bands_out, (void *)Rn.mean,
  154. mwrow, mwcol);
  155. /* get spectral distance */
  156. diff2 = (globals->calculate_similarity)(&Ri, &Rn, globals);
  157. avgdiff += sqrt(diff2);
  158. count++;
  159. }
  160. }
  161. }
  162. hspec2ad = 0;
  163. if (avgdiff > 0) {
  164. avgdiff /= count;
  165. hspecad = hspec;
  166. /* OTB-like, contrast enhancing */
  167. hspecad = exp(-avgdiff * avgdiff / (2 * ka2)) * avgdiff;
  168. /* preference for large regions, from Perona Malik 1990
  169. * if the settings are right, it could be used to reduce noise */
  170. /* hspecad = 1 / (1 + (avgdiff * avgdiff / (2 * hspec2))); */
  171. hspec2ad = hspecad * hspecad;
  172. G_debug(1, "avg spectral diff: %g", avgdiff);
  173. G_debug(1, "initial hspec2: %g", hspec2);
  174. G_debug(1, "adapted hspec2: %g", hspec2ad);
  175. }
  176. }
  177. /*
  178. * if the cell values are different to the left and above
  179. * (diagonal: and above left and above right)
  180. * then we must start a new clump
  181. *
  182. * this new clump may eventually collide with another
  183. * clump and will have to be merged
  184. */
  185. /* try to connect the current cell to an existing clump */
  186. OLD = NEW = 0;
  187. Rk.mean = cur_in[col - 1];
  188. /* same clump as to the left */
  189. if (!CISNULL(row, col - 1) &&
  190. globals->calculate_similarity(&Ri, &Rk, globals) <= hspec2ad) {
  191. OLD = cur_clump[col] = cur_clump[col - 1];
  192. }
  193. if (diag) {
  194. /* check above right, center, left, in that order */
  195. temp_clump = prev_clump + col + 1;
  196. bcol = col + 1;
  197. do {
  198. Rk.mean = prev_in[bcol];
  199. if (row > 0 && !CISNULL(row - 1, bcol) &&
  200. globals->calculate_similarity(&Ri, &Rk, globals) <= hspec2ad) {
  201. cur_clump[col] = *temp_clump;
  202. if (OLD == 0) {
  203. OLD = *temp_clump;
  204. }
  205. else {
  206. NEW = *temp_clump;
  207. /* threshold > 0 and diagonal requires a bit of extra work
  208. * because of bridge cells:
  209. * A similar to B, B similar to C, but A not similar to C
  210. * -> B is bridge cell */
  211. if (NEW != OLD) {
  212. CELL *temp_clump2;
  213. /* conflict! preserve NEW clump ID and change OLD clump ID.
  214. * Must go back to the left in the current row and to the right
  215. * in the previous row to change all the clump values as well.
  216. */
  217. /* left of the current row from 1 to col - 1 */
  218. temp_clump2 = cur_clump;
  219. n = col - 1;
  220. while (n-- > 0) {
  221. temp_clump2++; /* skip left edge */
  222. if (*temp_clump2 == OLD)
  223. *temp_clump2 = NEW;
  224. }
  225. /* right of previous row from col - 1 to ncols */
  226. temp_clump2 = prev_clump + col - 1;
  227. n = ncols - col + 2;
  228. while (n-- > 0) {
  229. if (*temp_clump2 == OLD)
  230. *temp_clump2 = NEW;
  231. temp_clump2++;
  232. }
  233. /* modify the OLD index */
  234. index[OLD] = NEW;
  235. OLD = NEW;
  236. NEW = 0;
  237. }
  238. }
  239. }
  240. temp_clump--;
  241. } while (bcol-- > col - 1);
  242. }
  243. else {
  244. /* check above */
  245. Rk.mean = prev_in[col];
  246. if (row > 0 && !CISNULL(row - 1, col) &&
  247. globals->calculate_similarity(&Ri, &Rk, globals) <= hspec2ad) {
  248. temp_clump = prev_clump + col;
  249. cur_clump[col] = *temp_clump;
  250. if (OLD == 0) {
  251. OLD = *temp_clump;
  252. }
  253. else {
  254. NEW = *temp_clump;
  255. if (NEW != OLD) {
  256. /* conflict! preserve NEW clump ID and change OLD clump ID.
  257. * Must go back to the left in the current row and to the right
  258. * in the previous row to change all the clump values as well.
  259. */
  260. /* left of the current row from 1 to col - 1 */
  261. temp_clump = cur_clump;
  262. n = col - 1;
  263. while (n-- > 0) {
  264. temp_clump++; /* skip left edge */
  265. if (*temp_clump == OLD)
  266. *temp_clump = NEW;
  267. }
  268. /* right of previous row from col + 1 to ncols */
  269. temp_clump = prev_clump + col;
  270. n = ncols - col;
  271. while (n-- > 0) {
  272. temp_clump++; /* skip col */
  273. if (*temp_clump == OLD)
  274. *temp_clump = NEW;
  275. }
  276. /* modify the OLD index */
  277. index[OLD] = NEW;
  278. OLD = NEW;
  279. NEW = 0;
  280. }
  281. }
  282. }
  283. }
  284. if (NEW == 0 || OLD == NEW) { /* ok */
  285. if (OLD == 0) {
  286. /* start a new clump */
  287. if (label == cellmax)
  288. G_fatal_error(_("Too many objects: integer overflow"));
  289. label++;
  290. cur_clump[col] = label;
  291. if (label >= nalloc) {
  292. nalloc += INCR;
  293. index =
  294. (CELL *) G_realloc(index,
  295. nalloc * sizeof(CELL));
  296. }
  297. index[label] = label;
  298. }
  299. }
  300. /* else the relabelling above failed */
  301. }
  302. /* write initial clump IDs */
  303. /* this works also with writing out cur_clump, but only
  304. * prev_clump is complete and will not change any more */
  305. if (row > 0) {
  306. if (write(cfd, prev_clump + 1, csize) != csize)
  307. G_fatal_error(_("Unable to write to temp file"));
  308. }
  309. /* switch the buffers so that the current buffer becomes the previous */
  310. temp_in = cur_in;
  311. cur_in = prev_in;
  312. prev_in = temp_in;
  313. temp_clump = cur_clump;
  314. cur_clump = prev_clump;
  315. prev_clump = temp_clump;
  316. }
  317. /* write last row with initial clump IDs */
  318. if (write(cfd, prev_clump + 1, csize) != csize)
  319. G_fatal_error(_("Unable to write to temp file"));
  320. G_percent(1, 1, 1);
  321. G_free(prev_in[0]);
  322. G_free(cur_in[0]);
  323. G_free(prev_in);
  324. G_free(cur_in);
  325. /* generate a renumbering scheme */
  326. G_message(_("Generating renumbering scheme..."));
  327. G_debug(1, "%d initial labels", label);
  328. /* allocate final clump ID */
  329. renumber = (CELL *) G_malloc((label + 1) * sizeof(CELL));
  330. renumber[0] = 0;
  331. cat = 0;
  332. G_percent(0, label, 1);
  333. for (n = 1; n <= label; n++) {
  334. G_percent(n, label, 1);
  335. OLD = n;
  336. NEW = index[n];
  337. if (OLD != NEW) {
  338. renumber[n] = 0;
  339. /* find valid clump ID */
  340. while (OLD != NEW) {
  341. OLD = NEW;
  342. NEW = index[OLD];
  343. }
  344. index[n] = NEW;
  345. }
  346. else
  347. /* set final clump id */
  348. renumber[n] = ++cat;
  349. }
  350. if (cat > cellmax - globals->max_rid)
  351. G_fatal_error(_("Too many objects: integer overflow"));
  352. /* rewind temp file */
  353. lseek(cfd, 0, SEEK_SET);
  354. /****************************************************
  355. * PASS 2 *
  356. * apply renumbering scheme to initial clump labels *
  357. ****************************************************/
  358. /* the input raster is no longer needed,
  359. * using instead the temp file with initial clump labels */
  360. G_message(_("Assigning final region IDs..."));
  361. for (row = 0; row < nrows; row++) {
  362. G_percent(row, nrows, 2);
  363. if (read(cfd, cur_clump, csize) != csize)
  364. G_fatal_error(_("Unable to read from temp file"));
  365. temp_clump = cur_clump;
  366. for (col = 0; col < ncols; col++) {
  367. if (!(FLAG_GET(globals->null_flag, row, col))) {
  368. out_cell = renumber[index[*temp_clump]] + globals->max_rid;
  369. Segment_put(&globals->rid_seg, (void *)&out_cell, row, col);
  370. }
  371. temp_clump++;
  372. }
  373. }
  374. G_percent(1, 1, 1);
  375. close(cfd);
  376. unlink(cname);
  377. /* free */
  378. G_free(prev_clump);
  379. G_free(cur_clump);
  380. G_free(index);
  381. G_free(renumber);
  382. G_message(_("Found %d clumps"), cat);
  383. globals->max_rid += cat;
  384. return globals->max_rid;
  385. }