clump.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707
  1. /****************************************************************************
  2. *
  3. * MODULE: r.clump
  4. *
  5. * AUTHOR(S): Michael Shapiro - CERL
  6. * Markus Metz
  7. *
  8. * PURPOSE: Recategorizes data in a raster map layer by grouping cells
  9. * that form physically discrete areas into unique categories.
  10. *
  11. * COPYRIGHT: (C) 2006-2016 by the GRASS Development Team
  12. *
  13. * This program is free software under the GNU General Public
  14. * License (>=v2). Read the file COPYING that comes with GRASS
  15. * for details.
  16. *
  17. ***************************************************************************/
  18. #include <sys/types.h>
  19. #include <sys/stat.h>
  20. #include <fcntl.h>
  21. #include <unistd.h>
  22. #include <time.h>
  23. #include <grass/gis.h>
  24. #include <grass/raster.h>
  25. #include <grass/glocale.h>
  26. #include "local_proto.h"
  27. #define INCR 1024
  28. static CELL do_renumber(int *in_fd, DCELL *rng, int nin,
  29. int diag, int minsize,
  30. int cfd, CELL label, CELL *index, int out_fd)
  31. {
  32. int row, col, nrows, ncols;
  33. int n;
  34. CELL OLD, NEW;
  35. CELL *temp_cell, *temp_clump;
  36. CELL *cur_clump, *out_cell;
  37. CELL *clumpid;
  38. CELL cat;
  39. int csize;
  40. nrows = Rast_window_rows();
  41. ncols = Rast_window_cols();
  42. csize = ncols * sizeof(CELL);
  43. /* generate a renumbering scheme */
  44. G_message(_("Generating renumbering scheme..."));
  45. G_debug(1, "%d initial labels", label);
  46. /* allocate final clump ID */
  47. clumpid = (CELL *) G_malloc((label + 1) * sizeof(CELL));
  48. clumpid[0] = 0;
  49. cat = 0;
  50. G_percent(0, label, 1);
  51. for (n = 1; n <= label; n++) {
  52. G_percent(n, label, 1);
  53. OLD = n;
  54. NEW = index[n];
  55. if (OLD != NEW) {
  56. clumpid[n] = 0;
  57. /* find valid clump ID */
  58. while (OLD != NEW) {
  59. OLD = NEW;
  60. NEW = index[OLD];
  61. }
  62. index[n] = NEW;
  63. }
  64. else
  65. /* set final clump id */
  66. clumpid[n] = ++cat;
  67. }
  68. /****************************************************
  69. * PASS 2 *
  70. * apply renumbering scheme to initial clump labels *
  71. ****************************************************/
  72. G_message(_("Pass 2 of 2..."));
  73. if (minsize > 1) {
  74. int do_write;
  75. off_t coffset;
  76. CELL new_clump;
  77. cur_clump = Rast_allocate_c_buf();
  78. for (row = 0; row < nrows; row++) {
  79. G_percent(row, nrows, 2);
  80. coffset = (off_t)row * csize;
  81. lseek(cfd, coffset, SEEK_SET);
  82. if (read(cfd, cur_clump, csize) != csize)
  83. G_fatal_error(_("Unable to read from temp file"));
  84. temp_clump = cur_clump;
  85. do_write = 0;
  86. for (col = 0; col < ncols; col++) {
  87. new_clump = clumpid[index[*temp_clump]];
  88. if (*temp_clump != new_clump) {
  89. *temp_clump = new_clump;
  90. do_write = 1;
  91. }
  92. temp_clump++;
  93. }
  94. if (do_write) {
  95. lseek(cfd, coffset, SEEK_SET);
  96. if (write(cfd, cur_clump, csize) != csize)
  97. G_fatal_error(_("Unable to write to temp file"));
  98. }
  99. }
  100. G_percent(1, 1, 1);
  101. G_free(cur_clump);
  102. G_free(index);
  103. G_free(clumpid);
  104. G_message(_("%d initial clumps"), cat);
  105. return merge_small_clumps(in_fd, nin, rng,
  106. diag, minsize, &cat,
  107. cfd, out_fd);
  108. }
  109. if (out_fd < 0) {
  110. fprintf(stdout, "clumps=%d\n", cat);
  111. return cat;
  112. }
  113. /* the input raster is no longer needed,
  114. * using instead the temp file with initial clump labels */
  115. /* rewind temp file */
  116. lseek(cfd, 0, SEEK_SET);
  117. cur_clump = Rast_allocate_c_buf();
  118. out_cell = Rast_allocate_c_buf();
  119. for (row = 0; row < nrows; row++) {
  120. G_percent(row, nrows, 2);
  121. if (read(cfd, cur_clump, csize) != csize)
  122. G_fatal_error(_("Unable to read from temp file"));
  123. temp_clump = cur_clump;
  124. temp_cell = out_cell;
  125. for (col = 0; col < ncols; col++) {
  126. *temp_cell = clumpid[index[*temp_clump]];
  127. if (*temp_cell == 0) {
  128. Rast_set_c_null_value(temp_cell, 1);
  129. }
  130. temp_clump++;
  131. temp_cell++;
  132. }
  133. Rast_put_row(out_fd, out_cell, CELL_TYPE);
  134. }
  135. G_percent(1, 1, 1);
  136. G_free(cur_clump);
  137. G_free(out_cell);
  138. G_free(index);
  139. G_free(clumpid);
  140. return cat;
  141. }
  142. CELL clump(int *in_fd, int out_fd, int diag, int minsize)
  143. {
  144. register int col;
  145. register int n;
  146. CELL NEW, OLD;
  147. CELL *temp_cell, *temp_clump;
  148. CELL *prev_in, *cur_in;
  149. CELL *prev_clump, *cur_clump;
  150. CELL X, LEFT;
  151. CELL *index;
  152. CELL label;
  153. int nrows, ncols;
  154. int row;
  155. int len;
  156. int nalloc;
  157. long cur_time;
  158. char *cname;
  159. int cfd, csize;
  160. nrows = Rast_window_rows();
  161. ncols = Rast_window_cols();
  162. /* allocate clump index */
  163. nalloc = INCR;
  164. index = (CELL *) G_malloc(nalloc * sizeof(CELL));
  165. index[0] = 0;
  166. /* allocate CELL buffers two columns larger than current window */
  167. len = (ncols + 2) * sizeof(CELL);
  168. prev_in = (CELL *) G_malloc(len);
  169. cur_in = (CELL *) G_malloc(len);
  170. prev_clump = (CELL *) G_malloc(len);
  171. cur_clump = (CELL *) G_malloc(len);
  172. /* temp file for initial clump IDs */
  173. cname = G_tempfile();
  174. if ((cfd = open(cname, O_RDWR | O_CREAT | O_EXCL, 0600)) < 0)
  175. G_fatal_error(_("Unable to open temp file"));
  176. csize = ncols * sizeof(CELL);
  177. time(&cur_time);
  178. /* fake a previous row which is all NULL */
  179. Rast_set_c_null_value(prev_in, ncols + 2);
  180. /* set left and right edge to NULL */
  181. Rast_set_c_null_value(&cur_in[0], 1);
  182. Rast_set_c_null_value(&cur_in[ncols + 1], 1);
  183. /* initialize clump labels */
  184. G_zero(cur_clump, len);
  185. G_zero(prev_clump, len);
  186. label = 0;
  187. /****************************************************
  188. * PASS 1 *
  189. * pass thru the input, create initial clump labels *
  190. ****************************************************/
  191. G_message(_("Pass 1 of 2..."));
  192. for (row = 0; row < nrows; row++) {
  193. Rast_get_c_row(*in_fd, cur_in + 1, row);
  194. G_percent(row, nrows, 2);
  195. Rast_set_c_null_value(&X, 1);
  196. for (col = 1; col <= ncols; col++) {
  197. LEFT = X;
  198. X = cur_in[col];
  199. if (Rast_is_c_null_value(&X)) { /* don't clump NULL data */
  200. cur_clump[col] = 0;
  201. continue;
  202. }
  203. /*
  204. * if the cell value is different to the left and above
  205. * (diagonal: and above left and above right)
  206. * then we must start a new clump
  207. *
  208. * this new clump may eventually collide with another
  209. * clump and will have to be merged
  210. */
  211. /* try to connect the current cell to an existing clump */
  212. OLD = NEW = 0;
  213. /* same clump as to the left */
  214. if (X == LEFT) {
  215. OLD = cur_clump[col] = cur_clump[col - 1];
  216. }
  217. if (diag) {
  218. /* check above right, center, left, in that order */
  219. n = 2;
  220. temp_clump = prev_clump + col + 1;
  221. temp_cell = prev_in + col + 1;
  222. do {
  223. if (X == *temp_cell) {
  224. cur_clump[col] = *temp_clump;
  225. if (OLD == 0) {
  226. OLD = *temp_clump;
  227. }
  228. else {
  229. NEW = *temp_clump;
  230. break;
  231. }
  232. }
  233. temp_cell--;
  234. temp_clump--;
  235. } while (n-- > 0);
  236. }
  237. else {
  238. /* check above */
  239. if (X == prev_in[col]) {
  240. temp_clump = prev_clump + col;
  241. cur_clump[col] = *temp_clump;
  242. if (OLD == 0) {
  243. OLD = *temp_clump;
  244. }
  245. else {
  246. NEW = *temp_clump;
  247. }
  248. }
  249. }
  250. if (NEW == 0 || OLD == NEW) { /* ok */
  251. if (OLD == 0) {
  252. /* start a new clump */
  253. label++;
  254. cur_clump[col] = label;
  255. if (label >= nalloc) {
  256. nalloc += INCR;
  257. index =
  258. (CELL *) G_realloc(index,
  259. nalloc * sizeof(CELL));
  260. }
  261. index[label] = label;
  262. }
  263. continue;
  264. }
  265. /* conflict! preserve NEW clump ID and change OLD clump ID.
  266. * Must go back to the left in the current row and to the right
  267. * in the previous row to change all the clump values as well.
  268. */
  269. /* left of the current row from 1 to col - 1 */
  270. temp_clump = cur_clump;
  271. n = col - 1;
  272. while (n-- > 0) {
  273. temp_clump++; /* skip left edge */
  274. if (*temp_clump == OLD)
  275. *temp_clump = NEW;
  276. }
  277. /* right of previous row from col + 1 to ncols */
  278. temp_clump = prev_clump + col;
  279. n = ncols - col;
  280. while (n-- > 0) {
  281. temp_clump++; /* skip col */
  282. if (*temp_clump == OLD)
  283. *temp_clump = NEW;
  284. }
  285. /* modify the OLD index */
  286. index[OLD] = NEW;
  287. }
  288. /* write initial clump IDs */
  289. /* this works also with writing out cur_clump, but only
  290. * prev_clump is complete and will not change any more */
  291. if (row > 0) {
  292. if (write(cfd, prev_clump + 1, csize) != csize)
  293. G_fatal_error(_("Unable to write to temp file"));
  294. }
  295. /* switch the buffers so that the current buffer becomes the previous */
  296. temp_cell = cur_in;
  297. cur_in = prev_in;
  298. prev_in = temp_cell;
  299. temp_clump = cur_clump;
  300. cur_clump = prev_clump;
  301. prev_clump = temp_clump;
  302. }
  303. /* write last row with initial clump IDs */
  304. if (write(cfd, prev_clump + 1, csize) != csize)
  305. G_fatal_error(_("Unable to write to temp file"));
  306. G_percent(1, 1, 1);
  307. /* free */
  308. G_free(prev_clump);
  309. G_free(cur_clump);
  310. G_free(prev_in);
  311. G_free(cur_in);
  312. do_renumber(in_fd, NULL, 1, diag, minsize, cfd, label, index, out_fd);
  313. close(cfd);
  314. unlink(cname);
  315. print_time(&cur_time);
  316. return 0;
  317. }
  318. static double get_diff2(DCELL **a, int acol, DCELL **b, int bcol, DCELL *rng, int n)
  319. {
  320. int i;
  321. double diff, diff2;
  322. diff2 = 0;
  323. for (i = 0; i < n; i++) {
  324. if (Rast_is_d_null_value(&b[i][bcol]))
  325. return 2;
  326. diff = a[i][acol] - b[i][bcol];
  327. /* normalize with the band's range */
  328. if (rng[i])
  329. diff /= rng[i];
  330. diff2 += diff * diff;
  331. }
  332. /* normalize difference to the range [0, 1] */
  333. diff2 /= n;
  334. return diff2;
  335. }
  336. CELL clump_n(int *in_fd, char **inname, int nin, double threshold,
  337. int out_fd, int diag, int minsize)
  338. {
  339. register int col;
  340. register int i, n;
  341. /* input */
  342. DCELL **prev_in, **cur_in, **temp_in;
  343. int bcol;
  344. DCELL *rng, maxdiff;
  345. double thresh2;
  346. /* output */
  347. CELL OLD, NEW;
  348. CELL *temp_clump;
  349. CELL *prev_clump, *cur_clump;
  350. CELL *index;
  351. CELL label;
  352. int nrows, ncols;
  353. int row;
  354. int isnull;
  355. int len;
  356. int nalloc;
  357. long cur_time;
  358. char *cname;
  359. int cfd, csize;
  360. G_message(_("%d-band clumping with threshold %g"), nin, threshold);
  361. nrows = Rast_window_rows();
  362. ncols = Rast_window_cols();
  363. thresh2 = threshold * threshold;
  364. /* allocate clump index */
  365. nalloc = INCR;
  366. index = (CELL *) G_malloc(nalloc * sizeof(CELL));
  367. index[0] = 0;
  368. /* allocate DCELL buffers two columns larger than current window */
  369. len = (ncols + 2) * sizeof(DCELL);
  370. prev_in = (DCELL **) G_malloc(sizeof(DCELL *) * nin);
  371. cur_in = (DCELL **) G_malloc(sizeof(DCELL *) * nin);
  372. rng = G_malloc(sizeof(DCELL) * nin);
  373. maxdiff = 0;
  374. for (i = 0; i < nin; i++) {
  375. struct FPRange fp_range; /* min/max values of each input raster */
  376. DCELL min, max;
  377. if (Rast_read_fp_range(inname[i], "", &fp_range) != 1)
  378. G_fatal_error(_("No min/max found in raster map <%s>"),
  379. inname[i]);
  380. Rast_get_fp_range_min_max(&fp_range, &min, &max);
  381. rng[i] = max - min;
  382. maxdiff += rng[i] * rng[i];
  383. prev_in[i] = (DCELL *) G_malloc(len);
  384. cur_in[i] = (DCELL *) G_malloc(len);
  385. /* fake a previous row which is all NULL */
  386. Rast_set_d_null_value(prev_in[i], ncols + 2);
  387. /* set left and right edge to NULL */
  388. Rast_set_d_null_value(&cur_in[i][0], 1);
  389. Rast_set_d_null_value(&cur_in[i][ncols + 1], 1);
  390. }
  391. G_debug(1, "maximum possible difference: %g", maxdiff);
  392. /* allocate CELL buffers two columns larger than current window */
  393. len = (ncols + 2) * sizeof(CELL);
  394. prev_clump = (CELL *) G_malloc(len);
  395. cur_clump = (CELL *) G_malloc(len);
  396. /* temp file for initial clump IDs */
  397. cname = G_tempfile();
  398. if ((cfd = open(cname, O_RDWR | O_CREAT | O_EXCL, 0600)) < 0)
  399. G_fatal_error(_("Unable to open temp file"));
  400. csize = ncols * sizeof(CELL);
  401. time(&cur_time);
  402. /* initialize clump labels */
  403. G_zero(cur_clump, len);
  404. G_zero(prev_clump, len);
  405. label = 0;
  406. /****************************************************
  407. * PASS 1 *
  408. * pass thru the input, create initial clump labels *
  409. ****************************************************/
  410. G_message(_("Pass 1 of 2..."));
  411. for (row = 0; row < nrows; row++) {
  412. G_percent(row, nrows, 2);
  413. for (i = 0; i < nin; i++) {
  414. Rast_get_d_row(in_fd[i], cur_in[i] + 1, row);
  415. }
  416. for (col = 1; col <= ncols; col++) {
  417. isnull = 0;
  418. for (i = 0; i < nin; i++) {
  419. if (Rast_is_d_null_value(&cur_in[i][col])) { /* don't clump NULL data */
  420. cur_clump[col] = 0;
  421. isnull = 1;
  422. break;
  423. }
  424. }
  425. if (isnull)
  426. continue;
  427. /*
  428. * if the cell values are different to the left and above
  429. * (diagonal: and above left and above right)
  430. * then we must start a new clump
  431. *
  432. * this new clump may eventually collide with another
  433. * clump and will have to be merged
  434. */
  435. /* try to connect the current cell to an existing clump */
  436. OLD = NEW = 0;
  437. /* same clump as to the left */
  438. if (get_diff2(cur_in, col, cur_in, col - 1, rng, nin) <= thresh2) {
  439. OLD = cur_clump[col] = cur_clump[col - 1];
  440. }
  441. if (diag) {
  442. /* check above right, center, left, in that order */
  443. temp_clump = prev_clump + col + 1;
  444. bcol = col + 1;
  445. do {
  446. if (get_diff2(cur_in, col, prev_in, bcol, rng, nin) <= thresh2) {
  447. cur_clump[col] = *temp_clump;
  448. if (OLD == 0) {
  449. OLD = *temp_clump;
  450. }
  451. else {
  452. NEW = *temp_clump;
  453. /* threshold > 0 and diagonal requires a bit of extra work
  454. * because of bridge cells:
  455. * A similar to B, B similar to C, but A not similar to C
  456. * -> B is bridge cell */
  457. if (NEW != OLD) {
  458. CELL *temp_clump2;
  459. /* conflict! preserve NEW clump ID and change OLD clump ID.
  460. * Must go back to the left in the current row and to the right
  461. * in the previous row to change all the clump values as well.
  462. */
  463. /* left of the current row from 1 to col - 1 */
  464. temp_clump2 = cur_clump;
  465. n = col - 1;
  466. while (n-- > 0) {
  467. temp_clump2++; /* skip left edge */
  468. if (*temp_clump2 == OLD)
  469. *temp_clump2 = NEW;
  470. }
  471. /* right of previous row from col - 1 to ncols */
  472. temp_clump2 = prev_clump + col - 1;
  473. n = ncols - col + 2;
  474. while (n-- > 0) {
  475. if (*temp_clump2 == OLD)
  476. *temp_clump2 = NEW;
  477. temp_clump2++;
  478. }
  479. /* modify the OLD index */
  480. index[OLD] = NEW;
  481. OLD = NEW;
  482. NEW = 0;
  483. }
  484. }
  485. }
  486. temp_clump--;
  487. } while (bcol-- > col - 1);
  488. }
  489. else {
  490. /* check above */
  491. if (get_diff2(cur_in, col, prev_in, col, rng, nin) <= thresh2) {
  492. temp_clump = prev_clump + col;
  493. cur_clump[col] = *temp_clump;
  494. if (OLD == 0) {
  495. OLD = *temp_clump;
  496. }
  497. else {
  498. NEW = *temp_clump;
  499. if (NEW != OLD) {
  500. /* conflict! preserve NEW clump ID and change OLD clump ID.
  501. * Must go back to the left in the current row and to the right
  502. * in the previous row to change all the clump values as well.
  503. */
  504. /* left of the current row from 1 to col - 1 */
  505. temp_clump = cur_clump;
  506. n = col - 1;
  507. while (n-- > 0) {
  508. temp_clump++; /* skip left edge */
  509. if (*temp_clump == OLD)
  510. *temp_clump = NEW;
  511. }
  512. /* right of previous row from col + 1 to ncols */
  513. temp_clump = prev_clump + col;
  514. n = ncols - col;
  515. while (n-- > 0) {
  516. temp_clump++; /* skip col */
  517. if (*temp_clump == OLD)
  518. *temp_clump = NEW;
  519. }
  520. /* modify the OLD index */
  521. index[OLD] = NEW;
  522. OLD = NEW;
  523. NEW = 0;
  524. }
  525. }
  526. }
  527. }
  528. if (NEW == 0 || OLD == NEW) { /* ok */
  529. if (OLD == 0) {
  530. /* start a new clump */
  531. label++;
  532. cur_clump[col] = label;
  533. if (label >= nalloc) {
  534. nalloc += INCR;
  535. index =
  536. (CELL *) G_realloc(index,
  537. nalloc * sizeof(CELL));
  538. }
  539. index[label] = label;
  540. }
  541. }
  542. /* else the relabelling above failed */
  543. }
  544. /* write initial clump IDs */
  545. /* this works also with writing out cur_clump, but only
  546. * prev_clump is complete and will not change any more */
  547. if (row > 0) {
  548. if (write(cfd, prev_clump + 1, csize) != csize)
  549. G_fatal_error(_("Unable to write to temp file"));
  550. }
  551. /* switch the buffers so that the current buffer becomes the previous */
  552. temp_in = cur_in;
  553. cur_in = prev_in;
  554. prev_in = temp_in;
  555. temp_clump = cur_clump;
  556. cur_clump = prev_clump;
  557. prev_clump = temp_clump;
  558. }
  559. /* write last row with initial clump IDs */
  560. if (write(cfd, prev_clump + 1, csize) != csize)
  561. G_fatal_error(_("Unable to write to temp file"));
  562. G_percent(1, 1, 1);
  563. /* free */
  564. G_free(prev_clump);
  565. G_free(cur_clump);
  566. for (i = 0; i < nin; i++) {
  567. G_free(prev_in[i]);
  568. G_free(cur_in[i]);
  569. }
  570. G_free(prev_in);
  571. G_free(cur_in);
  572. do_renumber(in_fd, rng, nin, diag, minsize, cfd, label, index, out_fd);
  573. close(cfd);
  574. unlink(cname);
  575. print_time(&cur_time);
  576. return 0;
  577. }
  578. int print_time(long *start)
  579. {
  580. int hours, minutes, seconds;
  581. long done;
  582. time(&done);
  583. seconds = done - *start;
  584. *start = done;
  585. hours = seconds / 3600;
  586. minutes = (seconds - hours * 3600) / 60;
  587. seconds = seconds % 60;
  588. if (hours)
  589. G_verbose_message("%2d:%02d:%02d", hours, minutes, seconds);
  590. else if (minutes)
  591. G_verbose_message("%d:%02d", minutes, seconds);
  592. else
  593. G_verbose_message("%d seconds", seconds);
  594. return 0;
  595. }