split.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. /***********************************************************************
  2. * MODULE: R-Tree library
  3. *
  4. * AUTHOR(S): Antonin Guttman - original code
  5. * Daniel Green (green@superliminal.com) - major clean-up
  6. * and implementation of bounding spheres
  7. * Markus Metz - R*-tree
  8. *
  9. * PURPOSE: Multidimensional index
  10. *
  11. * COPYRIGHT: (C) 2009 by the GRASS Development Team
  12. *
  13. * This program is free software under the GNU General
  14. * Public License (>=v2). Read the file COPYING that comes
  15. * with GRASS for details.
  16. *
  17. ***********************************************************************/
  18. #include <stdio.h>
  19. #include <assert.h>
  20. #include <float.h>
  21. #include "index.h"
  22. #include "card.h"
  23. #include "split.h"
  24. #ifndef DBL_MAX
  25. #define DBL_MAX 1.797693E308 /* DBL_MAX approximation */
  26. #endif
  27. struct Branch BranchBuf[MAXCARD + 1];
  28. int BranchCount;
  29. struct Rect CoverSplit;
  30. RectReal CoverSplitArea;
  31. /* variables for finding a partition */
  32. struct PartitionVars Partitions[METHODS];
  33. /*----------------------------------------------------------------------
  34. | Load branch buffer with branches from full node plus the extra branch.
  35. ----------------------------------------------------------------------*/
  36. static void RTreeGetBranches(struct Node *n, struct Branch *b,
  37. struct RTree *t)
  38. {
  39. int i, maxkids = 0;
  40. assert(n);
  41. assert(b);
  42. if ((n)->level > 0) {
  43. maxkids = t->nodecard;
  44. /* load the branch buffer */
  45. for (i = 0; i < maxkids; i++) {
  46. assert(n->branch[i].child.ptr); /* n should have every entry full */
  47. BranchBuf[i] = n->branch[i];
  48. }
  49. }
  50. else {
  51. maxkids = t->leafcard;
  52. /* load the branch buffer */
  53. for (i = 0; i < maxkids; i++) {
  54. assert(n->branch[i].child.id); /* n should have every entry full */
  55. BranchBuf[i] = n->branch[i];
  56. }
  57. }
  58. BranchBuf[maxkids] = *b;
  59. BranchCount = maxkids + 1;
  60. /* calculate rect containing all in the set */
  61. CoverSplit = BranchBuf[0].rect;
  62. for (i = 1; i < maxkids + 1; i++) {
  63. CoverSplit = RTreeCombineRect(&CoverSplit, &BranchBuf[i].rect, t);
  64. }
  65. CoverSplitArea = RTreeRectSphericalVolume(&CoverSplit, t);
  66. RTreeInitNode(n, n->level);
  67. }
  68. /*----------------------------------------------------------------------
  69. | Put a branch in one of the groups.
  70. ----------------------------------------------------------------------*/
  71. static void RTreeClassify(int i, int group, struct PartitionVars *p,
  72. struct RTree *t)
  73. {
  74. assert(p);
  75. assert(!p->taken[i]);
  76. p->partition[i] = group;
  77. p->taken[i] = TRUE;
  78. if (p->count[group] == 0)
  79. p->cover[group] = BranchBuf[i].rect;
  80. else
  81. p->cover[group] =
  82. RTreeCombineRect(&BranchBuf[i].rect, &p->cover[group], t);
  83. p->area[group] = RTreeRectSphericalVolume(&p->cover[group], t);
  84. p->count[group]++;
  85. }
  86. /**********************************************************************
  87. * *
  88. * Toni Guttman's quadratic splitting method *
  89. * *
  90. **********************************************************************/
  91. /*----------------------------------------------------------------------
  92. | Pick two rects from set to be the first elements of the two groups.
  93. | Pick the two that waste the most area if covered by a single
  94. | rectangle.
  95. ----------------------------------------------------------------------*/
  96. static void RTreePickSeeds(struct PartitionVars *p, struct RTree *t)
  97. {
  98. int i, j, seed0 = 0, seed1 = 0;
  99. RectReal worst, waste, area[MAXCARD + 1];
  100. for (i = 0; i < p->total; i++)
  101. area[i] = RTreeRectSphericalVolume(&BranchBuf[i].rect, t);
  102. worst = -CoverSplitArea - 1;
  103. for (i = 0; i < p->total - 1; i++) {
  104. for (j = i + 1; j < p->total; j++) {
  105. struct Rect one_rect;
  106. one_rect = RTreeCombineRect(&BranchBuf[i].rect,
  107. &BranchBuf[j].rect, t);
  108. waste =
  109. RTreeRectSphericalVolume(&one_rect, t) - area[i] - area[j];
  110. if (waste > worst) {
  111. worst = waste;
  112. seed0 = i;
  113. seed1 = j;
  114. }
  115. }
  116. }
  117. RTreeClassify(seed0, 0, p, t);
  118. RTreeClassify(seed1, 1, p, t);
  119. }
  120. /*----------------------------------------------------------------------
  121. | Copy branches from the buffer into two nodes according to the
  122. | partition.
  123. ----------------------------------------------------------------------*/
  124. static void RTreeLoadNodes(struct Node *n, struct Node *q,
  125. struct PartitionVars *p, struct RTree *t)
  126. {
  127. int i;
  128. assert(n);
  129. assert(q);
  130. assert(p);
  131. assert(t);
  132. for (i = 0; i < p->total; i++) {
  133. assert(p->partition[i] == 0 || p->partition[i] == 1);
  134. if (p->partition[i] == 0)
  135. RTreeAddBranch(&BranchBuf[i], n, NULL, NULL, NULL, NULL, t);
  136. else if (p->partition[i] == 1)
  137. RTreeAddBranch(&BranchBuf[i], q, NULL, NULL, NULL, NULL, t);
  138. }
  139. }
  140. /*----------------------------------------------------------------------
  141. | Initialize a PartitionVars structure.
  142. ----------------------------------------------------------------------*/
  143. void RTreeInitPVars(struct PartitionVars *p, int maxrects, int minfill)
  144. {
  145. int i;
  146. assert(p);
  147. p->count[0] = p->count[1] = 0;
  148. p->cover[0] = p->cover[1] = RTreeNullRect();
  149. p->area[0] = p->area[1] = (RectReal) 0;
  150. p->total = maxrects;
  151. p->minfill = minfill;
  152. for (i = 0; i < maxrects; i++) {
  153. p->taken[i] = FALSE;
  154. p->partition[i] = -1;
  155. }
  156. }
  157. /*----------------------------------------------------------------------
  158. | Print out data for a partition from PartitionVars struct.
  159. | Unused, for debugging only
  160. ----------------------------------------------------------------------*/
  161. static void RTreePrintPVars(struct PartitionVars *p)
  162. {
  163. int i;
  164. assert(p);
  165. fprintf(stdout, "\npartition:\n");
  166. for (i = 0; i < p->total; i++) {
  167. fprintf(stdout, "%3d\t", i);
  168. }
  169. fprintf(stdout, "\n");
  170. for (i = 0; i < p->total; i++) {
  171. if (p->taken[i])
  172. fprintf(stdout, " t\t");
  173. else
  174. fprintf(stdout, "\t");
  175. }
  176. fprintf(stdout, "\n");
  177. for (i = 0; i < p->total; i++) {
  178. fprintf(stdout, "%3d\t", p->partition[i]);
  179. }
  180. fprintf(stdout, "\n");
  181. fprintf(stdout, "count[0] = %d area = %f\n", p->count[0], p->area[0]);
  182. fprintf(stdout, "count[1] = %d area = %f\n", p->count[1], p->area[1]);
  183. if (p->area[0] + p->area[1] > 0) {
  184. fprintf(stdout, "total area = %f effectiveness = %3.2f\n",
  185. p->area[0] + p->area[1],
  186. (float)CoverSplitArea / (p->area[0] + p->area[1]));
  187. }
  188. fprintf(stdout, "cover[0]:\n");
  189. RTreePrintRect(&p->cover[0], 0);
  190. fprintf(stdout, "cover[1]:\n");
  191. RTreePrintRect(&p->cover[1], 0);
  192. }
  193. /*----------------------------------------------------------------------
  194. | Method #0 for choosing a partition: this is Toni Guttman's quadratic
  195. | split
  196. |
  197. | As the seeds for the two groups, pick the two rects that would waste
  198. | the most area if covered by a single rectangle, i.e. evidently the
  199. | worst pair to have in the same group. Of the remaining, one at a time
  200. | is chosen to be put in one of the two groups. The one chosen is the
  201. | one with the greatest difference in area expansion depending on which
  202. | group - the rect most strongly attracted to one group and repelled
  203. | from the other. If one group gets too full (more would force other
  204. | group to violate min fill requirement) then other group gets the rest.
  205. | These last are the ones that can go in either group most easily.
  206. ----------------------------------------------------------------------*/
  207. static void RTreeMethodZero(struct PartitionVars *p, int minfill,
  208. struct RTree *t)
  209. {
  210. int i;
  211. RectReal biggestDiff;
  212. int group, chosen = 0, betterGroup = 0;
  213. assert(p);
  214. RTreeInitPVars(p, BranchCount, minfill);
  215. RTreePickSeeds(p, t);
  216. while (p->count[0] + p->count[1] < p->total
  217. && p->count[0] < p->total - p->minfill
  218. && p->count[1] < p->total - p->minfill) {
  219. biggestDiff = (RectReal) - 1.;
  220. for (i = 0; i < p->total; i++) {
  221. if (!p->taken[i]) {
  222. struct Rect *r, rect_0, rect_1;
  223. RectReal growth0, growth1, diff;
  224. r = &BranchBuf[i].rect;
  225. rect_0 = RTreeCombineRect(r, &p->cover[0], t);
  226. rect_1 = RTreeCombineRect(r, &p->cover[1], t);
  227. growth0 = RTreeRectSphericalVolume(&rect_0, t) - p->area[0];
  228. growth1 = RTreeRectSphericalVolume(&rect_1, t) - p->area[1];
  229. diff = growth1 - growth0;
  230. if (diff >= 0)
  231. group = 0;
  232. else {
  233. group = 1;
  234. diff = -diff;
  235. }
  236. if (diff > biggestDiff) {
  237. biggestDiff = diff;
  238. chosen = i;
  239. betterGroup = group;
  240. }
  241. else if (diff == biggestDiff &&
  242. p->count[group] < p->count[betterGroup]) {
  243. chosen = i;
  244. betterGroup = group;
  245. }
  246. }
  247. }
  248. RTreeClassify(chosen, betterGroup, p, t);
  249. }
  250. /* if one group too full, put remaining rects in the other */
  251. if (p->count[0] + p->count[1] < p->total) {
  252. if (p->count[0] >= p->total - p->minfill)
  253. group = 1;
  254. else
  255. group = 0;
  256. for (i = 0; i < p->total; i++) {
  257. if (!p->taken[i])
  258. RTreeClassify(i, group, p, t);
  259. }
  260. }
  261. assert(p->count[0] + p->count[1] == p->total);
  262. assert(p->count[0] >= p->minfill && p->count[1] >= p->minfill);
  263. }
  264. /**********************************************************************
  265. * *
  266. * Norbert Beckmann's R*-tree splitting method *
  267. * *
  268. **********************************************************************/
  269. /*----------------------------------------------------------------------
  270. | swap branches
  271. ----------------------------------------------------------------------*/
  272. static void RTreeSwapBranches(struct Branch *a, struct Branch *b)
  273. {
  274. struct Branch c;
  275. c = *a;
  276. *a = *b;
  277. *b = c;
  278. }
  279. /*----------------------------------------------------------------------
  280. | compare branches for given rectangle side
  281. | return 1 if a > b
  282. | return 0 if a == b
  283. | return -1 if a < b
  284. ----------------------------------------------------------------------*/
  285. static int RTreeCompareBranches(struct Branch *a, struct Branch *b, int side)
  286. {
  287. if (a->rect.boundary[side] > b->rect.boundary[side])
  288. return 1;
  289. else if (a->rect.boundary[side] < b->rect.boundary[side])
  290. return -1;
  291. /* boundaries are equal */
  292. return 0;
  293. }
  294. /*----------------------------------------------------------------------
  295. | check if BranchBuf is sorted along given axis (dimension)
  296. ----------------------------------------------------------------------*/
  297. static int RTreeBranchBufIsSorted(int first, int last, int side)
  298. {
  299. int i;
  300. for (i = first; i < last; i++) {
  301. if (RTreeCompareBranches(&(BranchBuf[i]), &(BranchBuf[i + 1]), side)
  302. == 1)
  303. return 0;
  304. }
  305. return 1;
  306. }
  307. /*----------------------------------------------------------------------
  308. | partition BranchBuf for quicksort along given axis (dimension)
  309. ----------------------------------------------------------------------*/
  310. static int RTreePartitionBranchBuf(int first, int last, int side)
  311. {
  312. int pivot, mid = (first + last) / 2;
  313. int larger, smaller;
  314. if (last - first == 1) { /* only two items in list */
  315. if (RTreeCompareBranches
  316. (&(BranchBuf[first]), &(BranchBuf[last]), side) == 1) {
  317. RTreeSwapBranches(&(BranchBuf[first]), &(BranchBuf[last]));
  318. }
  319. return last;
  320. }
  321. /* Larger of two */
  322. if (RTreeCompareBranches(&(BranchBuf[first]), &(BranchBuf[mid]), side) ==
  323. 1) {
  324. larger = pivot = first;
  325. smaller = mid;
  326. }
  327. else {
  328. larger = pivot = mid;
  329. smaller = first;
  330. }
  331. if (RTreeCompareBranches(&(BranchBuf[larger]), &(BranchBuf[last]), side)
  332. == 1) {
  333. /* larger is largest, get the larger of smaller and last */
  334. if (RTreeCompareBranches
  335. (&(BranchBuf[smaller]), &(BranchBuf[last]), side) == 1) {
  336. pivot = smaller;
  337. }
  338. else {
  339. pivot = last;
  340. }
  341. }
  342. if (pivot != last) {
  343. RTreeSwapBranches(&(BranchBuf[pivot]), &(BranchBuf[last]));
  344. }
  345. pivot = first;
  346. while (first < last) {
  347. if (RTreeCompareBranches
  348. (&(BranchBuf[first]), &(BranchBuf[last]), side) != 1) {
  349. if (pivot != first) {
  350. RTreeSwapBranches(&(BranchBuf[pivot]), &(BranchBuf[first]));
  351. }
  352. pivot++;
  353. }
  354. ++first;
  355. }
  356. if (pivot != last) {
  357. RTreeSwapBranches(&(BranchBuf[pivot]), &(BranchBuf[last]));
  358. }
  359. return pivot;
  360. }
  361. /*----------------------------------------------------------------------
  362. | quicksort BranchBuf along given side
  363. ----------------------------------------------------------------------*/
  364. static void RTreeQuicksortBranchBuf(int side)
  365. {
  366. int pivot, first, last;
  367. int s_first[MAXCARD + 1], s_last[MAXCARD + 1], stacksize;
  368. s_first[0] = 0;
  369. s_last[0] = BranchCount - 1;
  370. stacksize = 1;
  371. /* use stack */
  372. while (stacksize) {
  373. stacksize--;
  374. first = s_first[stacksize];
  375. last = s_last[stacksize];
  376. if (first < last) {
  377. if (!RTreeBranchBufIsSorted(first, last, side)) {
  378. pivot = RTreePartitionBranchBuf(first, last, side);
  379. s_first[stacksize] = first;
  380. s_last[stacksize] = pivot - 1;
  381. stacksize++;
  382. s_first[stacksize] = pivot + 1;
  383. s_last[stacksize] = last;
  384. stacksize++;
  385. }
  386. }
  387. }
  388. }
  389. /*----------------------------------------------------------------------
  390. | Method #1 for choosing a partition: this is the R*-tree method
  391. |
  392. | Pick the axis with the smallest margin increase (keep rectangles
  393. | square).
  394. | Along the chosen split axis, choose the distribution with the mimimum
  395. | overlap-value. Resolve ties by choosing the distribution with the
  396. | mimimum area-value.
  397. | If one group gets too full (more would force other group to violate min
  398. | fill requirement) then other group gets the rest.
  399. | These last are the ones that can go in either group most easily.
  400. ----------------------------------------------------------------------*/
  401. static void RTreeMethodOne(struct PartitionVars *p, int minfill,
  402. struct RTree *t)
  403. {
  404. int i, j, k, l, s, maxkids;
  405. int axis = 0, best_axis = 0, side = 0, best_side[NUMDIMS];
  406. int best_cut[NUMDIMS];
  407. RectReal margin, smallest_margin = 0;
  408. struct Rect *r1, *r2, testrect1, testrect2, upperrect, orect;
  409. int minfill1 = minfill - 1;
  410. RectReal overlap, vol, smallest_overlap = -1, smallest_vol = -1;
  411. assert(p);
  412. RTreeInitPVars(p, BranchCount, minfill);
  413. maxkids = (minfill == t->min_leaf_fill ? t->leafcard : t->nodecard);
  414. margin = DBL_MAX;
  415. /* choose split axis */
  416. /* For each dimension, sort rectangles first by lower boundary then
  417. * by upper boundary. Get the smallest margin. */
  418. for (i = 0; i < t->ndims; i++) {
  419. axis = i;
  420. best_cut[i] = 0;
  421. best_side[i] = 0;
  422. smallest_overlap = DBL_MAX;
  423. smallest_vol = DBL_MAX;
  424. /* first lower then upper bounds for each axis */
  425. for (s = 0; s < 2; s++) {
  426. RTreeQuicksortBranchBuf(i + s * NUMDIMS);
  427. side = s;
  428. testrect1 = BranchBuf[0].rect;
  429. upperrect = BranchBuf[maxkids].rect;
  430. for (j = 1; j < minfill1; j++) {
  431. r1 = &BranchBuf[j].rect;
  432. testrect1 = RTreeCombineRect(&testrect1, r1, t);
  433. r2 = &BranchBuf[maxkids - j].rect;
  434. upperrect = RTreeCombineRect(&upperrect, r2, t);
  435. }
  436. r2 = &BranchBuf[maxkids - minfill1].rect;
  437. upperrect = RTreeCombineRect(&upperrect, r2, t);
  438. /* check distributions for this axis, adhere the minimum node fill */
  439. for (j = minfill1; j < BranchCount - minfill; j++) {
  440. r1 = &BranchBuf[j].rect;
  441. testrect1 = RTreeCombineRect(&testrect1, r1, t);
  442. testrect2 = upperrect;
  443. for (k = j + 1; k < BranchCount - minfill; k++) {
  444. r2 = &BranchBuf[k].rect;
  445. testrect2 = RTreeCombineRect(&testrect2, r2, t);
  446. }
  447. /* the margin is the sum of the lengths of the edges of a rectangle */
  448. margin =
  449. RTreeRectMargin(&testrect1,
  450. t) + RTreeRectMargin(&testrect2, t);
  451. /* remember best axis */
  452. if (margin <= smallest_margin) {
  453. smallest_margin = margin;
  454. best_axis = i;
  455. }
  456. /* remember best distribution for this axis */
  457. /* overlap size */
  458. overlap = 1;
  459. for (k = 0; k < t->ndims; k++) {
  460. /* no overlap */
  461. if (testrect1.boundary[k] >
  462. testrect2.boundary[k + NUMDIMS] ||
  463. testrect1.boundary[k + NUMDIMS] <
  464. testrect2.boundary[k]) {
  465. overlap = 0;
  466. break;
  467. }
  468. /* get overlap */
  469. else {
  470. if (testrect1.boundary[k] > testrect2.boundary[k])
  471. orect.boundary[k] = testrect1.boundary[k];
  472. else
  473. orect.boundary[k] = testrect2.boundary[k];
  474. l = k + NUMDIMS;
  475. if (testrect1.boundary[l] < testrect2.boundary[l])
  476. orect.boundary[l] = testrect1.boundary[l];
  477. else
  478. orect.boundary[l] = testrect2.boundary[l];
  479. }
  480. }
  481. if (overlap)
  482. overlap = RTreeRectVolume(&orect, t);
  483. vol =
  484. RTreeRectVolume(&testrect1,
  485. t) + RTreeRectVolume(&testrect2, t);
  486. /* get best cut for this axis */
  487. if (overlap <= smallest_overlap) {
  488. smallest_overlap = overlap;
  489. smallest_vol = vol;
  490. best_cut[i] = j;
  491. best_side[i] = s;
  492. }
  493. else if (overlap == smallest_overlap) {
  494. /* resolve ties by minimum volume */
  495. if (vol <= smallest_vol) {
  496. smallest_vol = vol;
  497. best_cut[i] = j;
  498. best_side[i] = s;
  499. }
  500. }
  501. } /* end of distribution check */
  502. } /* end of side check */
  503. } /* end of axis check */
  504. /* Use best distribution to classify branches */
  505. if (best_axis != axis || best_side[best_axis] != side)
  506. RTreeQuicksortBranchBuf(best_axis + best_side[best_axis] * NUMDIMS);
  507. best_cut[best_axis]++;
  508. for (i = 0; i < best_cut[best_axis]; i++)
  509. RTreeClassify(i, 0, p, t);
  510. for (i = best_cut[best_axis]; i < BranchCount; i++)
  511. RTreeClassify(i, 1, p, t);
  512. assert(p->count[0] + p->count[1] == p->total);
  513. assert(p->count[0] >= p->minfill && p->count[1] >= p->minfill);
  514. }
  515. /*----------------------------------------------------------------------
  516. | Split a node.
  517. | Divides the nodes branches and the extra one between two nodes.
  518. | Old node is one of the new ones, and one really new one is created.
  519. | May use quadratic split or R*-tree split.
  520. ----------------------------------------------------------------------*/
  521. void RTreeSplitNode(struct Node *n, struct Branch *b, struct Node *nn,
  522. struct RTree *t)
  523. {
  524. struct PartitionVars *p;
  525. int level;
  526. assert(n);
  527. assert(b);
  528. /* load all the branches into a buffer, initialize old node */
  529. level = n->level;
  530. RTreeGetBranches(n, b, t);
  531. /* find partition */
  532. p = &Partitions[0];
  533. /* Note: can't use MINFILL(n) below since n was cleared by GetBranches() */
  534. /* RTreeMethodZero(p, level > 0 ? t->min_node_fill : t->min_leaf_fill, t); */
  535. RTreeMethodOne(p, level > 0 ? t->min_node_fill : t->min_leaf_fill, t);
  536. /*
  537. * put branches from buffer into 2 nodes
  538. * according to chosen partition
  539. */
  540. (nn)->level = n->level = level;
  541. RTreeLoadNodes(n, nn, p, t);
  542. assert(n->count + nn->count == p->total);
  543. }