split.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. /***********************************************************************
  2. * MODULE: R-Tree library
  3. *
  4. * AUTHOR(S): Antonin Guttman - original code
  5. * Daniel Green (green@superliminal.com) - major clean-up
  6. * and implementation of bounding spheres
  7. * Markus Metz - file-based and memory-based R*-tree
  8. *
  9. * PURPOSE: Multidimensional index
  10. *
  11. * COPYRIGHT: (C) 2001 by the GRASS Development Team
  12. *
  13. * This program is free software under the GNU General
  14. * Public License (>=v2). Read the file COPYING that comes
  15. * with GRASS for details.
  16. *
  17. ***********************************************************************/
  18. #include <stdio.h>
  19. #include <assert.h>
  20. #include <float.h>
  21. #include "index.h"
  22. #include "card.h"
  23. #include "split.h"
  24. #ifndef DBL_MAX
  25. #define DBL_MAX 1.797693E308 /* DBL_MAX approximation */
  26. #endif
  27. struct RTree_Branch BranchBuf[MAXCARD + 1];
  28. int BranchCount;
  29. struct RTree_Rect CoverSplit;
  30. RectReal CoverSplitArea;
  31. /* variables for finding a partition */
  32. struct RTree_PartitionVars Partitions[1];
  33. /*----------------------------------------------------------------------
  34. | Load branch buffer with branches from full node plus the extra branch.
  35. ----------------------------------------------------------------------*/
  36. static void RTreeGetBranches(struct RTree_Node *n, struct RTree_Branch *b,
  37. struct RTree *t)
  38. {
  39. int i, maxkids = 0;
  40. if ((n)->level > 0) {
  41. maxkids = t->nodecard;
  42. /* load the branch buffer */
  43. for (i = 0; i < maxkids; i++) {
  44. assert(t->valid_child(&(n->branch[i].child))); /* n should have every entry full */
  45. BranchBuf[i] = n->branch[i];
  46. }
  47. }
  48. else {
  49. maxkids = t->leafcard;
  50. /* load the branch buffer */
  51. for (i = 0; i < maxkids; i++) {
  52. assert(n->branch[i].child.id); /* n should have every entry full */
  53. BranchBuf[i] = n->branch[i];
  54. }
  55. }
  56. BranchBuf[maxkids] = *b;
  57. BranchCount = maxkids + 1;
  58. if (METHOD == 0) { /* quadratic split */
  59. /* calculate rect containing all in the set */
  60. CoverSplit = BranchBuf[0].rect;
  61. for (i = 1; i < maxkids + 1; i++) {
  62. CoverSplit = RTreeCombineRect(&CoverSplit, &BranchBuf[i].rect, t);
  63. }
  64. CoverSplitArea = RTreeRectSphericalVolume(&CoverSplit, t);
  65. }
  66. RTreeInitNode(n, NODETYPE(n->level, t->fd));
  67. }
  68. /*----------------------------------------------------------------------
  69. | Put a branch in one of the groups.
  70. ----------------------------------------------------------------------*/
  71. static void RTreeClassify(int i, int group, struct RTree_PartitionVars *p,
  72. struct RTree *t)
  73. {
  74. assert(!p->taken[i]);
  75. p->partition[i] = group;
  76. p->taken[i] = TRUE;
  77. if (METHOD == 0) {
  78. if (p->count[group] == 0)
  79. p->cover[group] = BranchBuf[i].rect;
  80. else
  81. p->cover[group] =
  82. RTreeCombineRect(&BranchBuf[i].rect, &p->cover[group], t);
  83. p->area[group] = RTreeRectSphericalVolume(&p->cover[group], t);
  84. }
  85. p->count[group]++;
  86. }
  87. /***************************************************
  88. * *
  89. * Toni Guttman's quadratic splitting method *
  90. * *
  91. ***************************************************/
  92. /*----------------------------------------------------------------------
  93. | Pick two rects from set to be the first elements of the two groups.
  94. | Pick the two that waste the most area if covered by a single
  95. | rectangle.
  96. ----------------------------------------------------------------------*/
  97. static void RTreePickSeeds(struct RTree_PartitionVars *p, struct RTree *t)
  98. {
  99. int i, j, seed0 = 0, seed1 = 0;
  100. RectReal worst, waste, area[MAXCARD + 1];
  101. for (i = 0; i < p->total; i++)
  102. area[i] = RTreeRectSphericalVolume(&BranchBuf[i].rect, t);
  103. worst = -CoverSplitArea - 1;
  104. for (i = 0; i < p->total - 1; i++) {
  105. for (j = i + 1; j < p->total; j++) {
  106. struct RTree_Rect one_rect;
  107. one_rect = RTreeCombineRect(&BranchBuf[i].rect,
  108. &BranchBuf[j].rect, t);
  109. waste =
  110. RTreeRectSphericalVolume(&one_rect, t) - area[i] - area[j];
  111. if (waste > worst) {
  112. worst = waste;
  113. seed0 = i;
  114. seed1 = j;
  115. }
  116. }
  117. }
  118. RTreeClassify(seed0, 0, p, t);
  119. RTreeClassify(seed1, 1, p, t);
  120. }
  121. /*----------------------------------------------------------------------
  122. | Copy branches from the buffer into two nodes according to the
  123. | partition.
  124. ----------------------------------------------------------------------*/
  125. static void RTreeLoadNodes(struct RTree_Node *n, struct RTree_Node *q,
  126. struct RTree_PartitionVars *p, struct RTree *t)
  127. {
  128. int i;
  129. for (i = 0; i < p->total; i++) {
  130. assert(p->partition[i] == 0 || p->partition[i] == 1);
  131. if (p->partition[i] == 0)
  132. RTreeAddBranch(&BranchBuf[i], n, NULL, NULL, NULL, NULL, t);
  133. else if (p->partition[i] == 1)
  134. RTreeAddBranch(&BranchBuf[i], q, NULL, NULL, NULL, NULL, t);
  135. }
  136. }
  137. /*----------------------------------------------------------------------
  138. | Initialize a PartitionVars structure.
  139. ----------------------------------------------------------------------*/
  140. void RTreeInitPVars(struct RTree_PartitionVars *p, int maxrects, int minfill)
  141. {
  142. int i;
  143. p->count[0] = p->count[1] = 0;
  144. p->cover[0] = p->cover[1] = RTreeNullRect();
  145. p->area[0] = p->area[1] = (RectReal) 0;
  146. p->total = maxrects;
  147. p->minfill = minfill;
  148. for (i = 0; i < maxrects; i++) {
  149. p->taken[i] = FALSE;
  150. p->partition[i] = -1;
  151. }
  152. }
  153. /*----------------------------------------------------------------------
  154. | Print out data for a partition from PartitionVars struct.
  155. | Unused, for debugging only
  156. ----------------------------------------------------------------------*/
  157. static void RTreePrintPVars(struct RTree_PartitionVars *p)
  158. {
  159. int i;
  160. fprintf(stdout, "\npartition:\n");
  161. for (i = 0; i < p->total; i++) {
  162. fprintf(stdout, "%3d\t", i);
  163. }
  164. fprintf(stdout, "\n");
  165. for (i = 0; i < p->total; i++) {
  166. if (p->taken[i])
  167. fprintf(stdout, " t\t");
  168. else
  169. fprintf(stdout, "\t");
  170. }
  171. fprintf(stdout, "\n");
  172. for (i = 0; i < p->total; i++) {
  173. fprintf(stdout, "%3d\t", p->partition[i]);
  174. }
  175. fprintf(stdout, "\n");
  176. fprintf(stdout, "count[0] = %d area = %f\n", p->count[0], p->area[0]);
  177. fprintf(stdout, "count[1] = %d area = %f\n", p->count[1], p->area[1]);
  178. if (p->area[0] + p->area[1] > 0) {
  179. fprintf(stdout, "total area = %f effectiveness = %3.2f\n",
  180. p->area[0] + p->area[1],
  181. (float)CoverSplitArea / (p->area[0] + p->area[1]));
  182. }
  183. fprintf(stdout, "cover[0]:\n");
  184. RTreePrintRect(&p->cover[0], 0);
  185. fprintf(stdout, "cover[1]:\n");
  186. RTreePrintRect(&p->cover[1], 0);
  187. }
  188. /*----------------------------------------------------------------------
  189. | Method #0 for choosing a partition: this is Toni Guttman's quadratic
  190. | split
  191. |
  192. | As the seeds for the two groups, pick the two rects that would waste
  193. | the most area if covered by a single rectangle, i.e. evidently the
  194. | worst pair to have in the same group. Of the remaining, one at a time
  195. | is chosen to be put in one of the two groups. The one chosen is the
  196. | one with the greatest difference in area expansion depending on which
  197. | group - the rect most strongly attracted to one group and repelled
  198. | from the other. If one group gets too full (more would force other
  199. | group to violate min fill requirement) then other group gets the rest.
  200. | These last are the ones that can go in either group most easily.
  201. ----------------------------------------------------------------------*/
  202. static void RTreeMethodZero(struct RTree_PartitionVars *p, int minfill,
  203. struct RTree *t)
  204. {
  205. int i;
  206. RectReal biggestDiff;
  207. int group, chosen = 0, betterGroup = 0;
  208. RTreeInitPVars(p, BranchCount, minfill);
  209. RTreePickSeeds(p, t);
  210. while (p->count[0] + p->count[1] < p->total
  211. && p->count[0] < p->total - p->minfill
  212. && p->count[1] < p->total - p->minfill) {
  213. biggestDiff = (RectReal) - 1.;
  214. for (i = 0; i < p->total; i++) {
  215. if (!p->taken[i]) {
  216. struct RTree_Rect *r, rect_0, rect_1;
  217. RectReal growth0, growth1, diff;
  218. r = &BranchBuf[i].rect;
  219. rect_0 = RTreeCombineRect(r, &p->cover[0], t);
  220. rect_1 = RTreeCombineRect(r, &p->cover[1], t);
  221. growth0 = RTreeRectSphericalVolume(&rect_0, t) - p->area[0];
  222. growth1 = RTreeRectSphericalVolume(&rect_1, t) - p->area[1];
  223. diff = growth1 - growth0;
  224. if (diff >= 0)
  225. group = 0;
  226. else {
  227. group = 1;
  228. diff = -diff;
  229. }
  230. if (diff > biggestDiff) {
  231. biggestDiff = diff;
  232. chosen = i;
  233. betterGroup = group;
  234. }
  235. else if (diff == biggestDiff &&
  236. p->count[group] < p->count[betterGroup]) {
  237. chosen = i;
  238. betterGroup = group;
  239. }
  240. }
  241. }
  242. RTreeClassify(chosen, betterGroup, p, t);
  243. }
  244. /* if one group too full, put remaining rects in the other */
  245. if (p->count[0] + p->count[1] < p->total) {
  246. if (p->count[0] >= p->total - p->minfill)
  247. group = 1;
  248. else
  249. group = 0;
  250. for (i = 0; i < p->total; i++) {
  251. if (!p->taken[i])
  252. RTreeClassify(i, group, p, t);
  253. }
  254. }
  255. assert(p->count[0] + p->count[1] == p->total);
  256. assert(p->count[0] >= p->minfill && p->count[1] >= p->minfill);
  257. }
  258. /**********************************************************************
  259. * *
  260. * Norbert Beckmann's R*-tree splitting method *
  261. * *
  262. **********************************************************************/
  263. /*----------------------------------------------------------------------
  264. | swap branches
  265. ----------------------------------------------------------------------*/
  266. static void RTreeSwapBranches(struct RTree_Branch *a, struct RTree_Branch *b)
  267. {
  268. struct RTree_Branch c;
  269. c = *a;
  270. *a = *b;
  271. *b = c;
  272. }
  273. /*----------------------------------------------------------------------
  274. | compare branches for given rectangle side
  275. | return 1 if a > b
  276. | return 0 if a == b
  277. | return -1 if a < b
  278. ----------------------------------------------------------------------*/
  279. static int RTreeCompareBranches(struct RTree_Branch *a, struct RTree_Branch *b, int side)
  280. {
  281. if (a->rect.boundary[side] > b->rect.boundary[side])
  282. return 1;
  283. else if (a->rect.boundary[side] < b->rect.boundary[side])
  284. return -1;
  285. /* boundaries are equal */
  286. return 0;
  287. }
  288. /*----------------------------------------------------------------------
  289. | check if BranchBuf is sorted along given axis (dimension)
  290. ----------------------------------------------------------------------*/
  291. static int RTreeBranchBufIsSorted(int first, int last, int side)
  292. {
  293. int i;
  294. for (i = first; i < last; i++) {
  295. if (RTreeCompareBranches(&(BranchBuf[i]), &(BranchBuf[i + 1]), side)
  296. == 1)
  297. return 0;
  298. }
  299. return 1;
  300. }
  301. /*----------------------------------------------------------------------
  302. | partition BranchBuf for quicksort along given axis (dimension)
  303. ----------------------------------------------------------------------*/
  304. static int RTreePartitionBranchBuf(int first, int last, int side)
  305. {
  306. int pivot, mid = (first + last) / 2;
  307. int larger, smaller;
  308. if (last - first == 1) { /* only two items in list */
  309. if (RTreeCompareBranches
  310. (&(BranchBuf[first]), &(BranchBuf[last]), side) == 1) {
  311. RTreeSwapBranches(&(BranchBuf[first]), &(BranchBuf[last]));
  312. }
  313. return last;
  314. }
  315. /* larger of two */
  316. if (RTreeCompareBranches(&(BranchBuf[first]), &(BranchBuf[mid]), side)
  317. == 1) {
  318. larger = pivot = first;
  319. smaller = mid;
  320. }
  321. else {
  322. larger = pivot = mid;
  323. smaller = first;
  324. }
  325. if (RTreeCompareBranches(&(BranchBuf[larger]), &(BranchBuf[last]), side)
  326. == 1) {
  327. /* larger is largest, get the larger of smaller and last */
  328. if (RTreeCompareBranches
  329. (&(BranchBuf[smaller]), &(BranchBuf[last]), side) == 1) {
  330. pivot = smaller;
  331. }
  332. else {
  333. pivot = last;
  334. }
  335. }
  336. if (pivot != last) {
  337. RTreeSwapBranches(&(BranchBuf[pivot]), &(BranchBuf[last]));
  338. }
  339. pivot = first;
  340. while (first < last) {
  341. if (RTreeCompareBranches
  342. (&(BranchBuf[first]), &(BranchBuf[last]), side) != 1) {
  343. if (pivot != first) {
  344. RTreeSwapBranches(&(BranchBuf[pivot]), &(BranchBuf[first]));
  345. }
  346. pivot++;
  347. }
  348. ++first;
  349. }
  350. if (pivot != last) {
  351. RTreeSwapBranches(&(BranchBuf[pivot]), &(BranchBuf[last]));
  352. }
  353. return pivot;
  354. }
  355. /*----------------------------------------------------------------------
  356. | quicksort BranchBuf along given side
  357. ----------------------------------------------------------------------*/
  358. static void RTreeQuicksortBranchBuf(int side)
  359. {
  360. int pivot, first, last;
  361. int s_first[MAXCARD + 1], s_last[MAXCARD + 1], stacksize;
  362. s_first[0] = 0;
  363. s_last[0] = BranchCount - 1;
  364. stacksize = 1;
  365. /* use stack */
  366. while (stacksize) {
  367. stacksize--;
  368. first = s_first[stacksize];
  369. last = s_last[stacksize];
  370. if (first < last) {
  371. if (!RTreeBranchBufIsSorted(first, last, side)) {
  372. pivot = RTreePartitionBranchBuf(first, last, side);
  373. s_first[stacksize] = first;
  374. s_last[stacksize] = pivot - 1;
  375. stacksize++;
  376. s_first[stacksize] = pivot + 1;
  377. s_last[stacksize] = last;
  378. stacksize++;
  379. }
  380. }
  381. }
  382. }
  383. /*----------------------------------------------------------------------
  384. | Method #1 for choosing a partition: this is the R*-tree method
  385. |
  386. | Pick the axis with the smallest margin increase (keep rectangles
  387. | square).
  388. | Along the chosen split axis, choose the distribution with the mimimum
  389. | overlap-value. Resolve ties by choosing the distribution with the
  390. | mimimum area-value.
  391. | If one group gets too full (more would force other group to violate min
  392. | fill requirement) then other group gets the rest.
  393. | These last are the ones that can go in either group most easily.
  394. ----------------------------------------------------------------------*/
  395. static void RTreeMethodOne(struct RTree_PartitionVars *p, int minfill,
  396. int maxkids, struct RTree *t)
  397. {
  398. int i, j, k, l, s;
  399. int axis = 0, best_axis = 0, side = 0, best_side[NUMDIMS];
  400. int best_cut[NUMDIMS];
  401. RectReal margin, smallest_margin = 0;
  402. struct RTree_Rect *r1, *r2, testrect1, testrect2, upperrect, orect;
  403. int minfill1 = minfill - 1;
  404. RectReal overlap, vol, smallest_overlap = -1, smallest_vol = -1;
  405. RTreeInitPVars(p, BranchCount, minfill);
  406. margin = DBL_MAX;
  407. /* choose split axis */
  408. /* For each dimension, sort rectangles first by lower boundary then
  409. * by upper boundary. Get the smallest margin. */
  410. for (i = 0; i < t->ndims; i++) {
  411. axis = i;
  412. best_cut[i] = 0;
  413. best_side[i] = 0;
  414. smallest_overlap = DBL_MAX;
  415. smallest_vol = DBL_MAX;
  416. /* first lower then upper bounds for each axis */
  417. for (s = 0; s < 2; s++) {
  418. RTreeQuicksortBranchBuf(i + s * NUMDIMS);
  419. side = s;
  420. testrect1 = BranchBuf[0].rect;
  421. upperrect = BranchBuf[maxkids].rect;
  422. for (j = 1; j < minfill1; j++) {
  423. r1 = &BranchBuf[j].rect;
  424. testrect1 = RTreeCombineRect(&testrect1, r1, t);
  425. r2 = &BranchBuf[maxkids - j].rect;
  426. upperrect = RTreeCombineRect(&upperrect, r2, t);
  427. }
  428. r2 = &BranchBuf[maxkids - minfill1].rect;
  429. upperrect = RTreeCombineRect(&upperrect, r2, t);
  430. /* check distributions for this axis, adhere the minimum node fill */
  431. for (j = minfill1; j < BranchCount - minfill; j++) {
  432. r1 = &BranchBuf[j].rect;
  433. testrect1 = RTreeCombineRect(&testrect1, r1, t);
  434. testrect2 = upperrect;
  435. for (k = j + 1; k < BranchCount - minfill; k++) {
  436. r2 = &BranchBuf[k].rect;
  437. testrect2 = RTreeCombineRect(&testrect2, r2, t);
  438. }
  439. /* the margin is the sum of the lengths of the edges of a rectangle */
  440. margin =
  441. RTreeRectMargin(&testrect1, t) +
  442. RTreeRectMargin(&testrect2, t);
  443. /* remember best axis */
  444. if (margin <= smallest_margin) {
  445. smallest_margin = margin;
  446. best_axis = i;
  447. }
  448. /* remember best distribution for this axis */
  449. /* overlap size */
  450. overlap = 1;
  451. for (k = 0; k < t->ndims; k++) {
  452. /* no overlap */
  453. if (testrect1.boundary[k] > testrect2.boundary[k + NUMDIMS] ||
  454. testrect1.boundary[k + NUMDIMS] < testrect2.boundary[k]) {
  455. overlap = 0;
  456. break;
  457. }
  458. /* get overlap */
  459. else {
  460. if (testrect1.boundary[k] > testrect2.boundary[k])
  461. orect.boundary[k] = testrect1.boundary[k];
  462. else
  463. orect.boundary[k] = testrect2.boundary[k];
  464. l = k + NUMDIMS;
  465. if (testrect1.boundary[l] < testrect2.boundary[l])
  466. orect.boundary[l] = testrect1.boundary[l];
  467. else
  468. orect.boundary[l] = testrect2.boundary[l];
  469. }
  470. }
  471. if (overlap)
  472. overlap = RTreeRectVolume(&orect, t);
  473. vol =
  474. RTreeRectVolume(&testrect1, t) +
  475. RTreeRectVolume(&testrect2, t);
  476. /* get best cut for this axis */
  477. if (overlap <= smallest_overlap) {
  478. smallest_overlap = overlap;
  479. smallest_vol = vol;
  480. best_cut[i] = j;
  481. best_side[i] = s;
  482. }
  483. else if (overlap == smallest_overlap) {
  484. /* resolve ties by minimum volume */
  485. if (vol <= smallest_vol) {
  486. smallest_vol = vol;
  487. best_cut[i] = j;
  488. best_side[i] = s;
  489. }
  490. }
  491. } /* end of distribution check */
  492. } /* end of side check */
  493. } /* end of axis check */
  494. /* Use best distribution to classify branches */
  495. if (best_axis != axis || best_side[best_axis] != side)
  496. RTreeQuicksortBranchBuf(best_axis + best_side[best_axis] * NUMDIMS);
  497. best_cut[best_axis]++;
  498. for (i = 0; i < best_cut[best_axis]; i++)
  499. RTreeClassify(i, 0, p, t);
  500. for (i = best_cut[best_axis]; i < BranchCount; i++)
  501. RTreeClassify(i, 1, p, t);
  502. assert(p->count[0] + p->count[1] == p->total);
  503. assert(p->count[0] >= p->minfill && p->count[1] >= p->minfill);
  504. }
  505. /*----------------------------------------------------------------------
  506. | Split a node.
  507. | Divides the nodes branches and the extra one between two nodes.
  508. | Old node is one of the new ones, and one really new one is created.
  509. | May use quadratic split or R*-tree split.
  510. ----------------------------------------------------------------------*/
  511. void RTreeSplitNode(struct RTree_Node *n, struct RTree_Branch *b,
  512. struct RTree_Node *nn, struct RTree *t)
  513. {
  514. struct RTree_PartitionVars *p;
  515. int level;
  516. /* load all the branches into a buffer, initialize old node */
  517. level = n->level;
  518. RTreeGetBranches(n, b, t);
  519. /* find partition */
  520. p = &Partitions[0];
  521. if (METHOD == 1) /* R* split */
  522. RTreeMethodOne(p, MINFILL(level, t), MAXKIDS(level, t), t);
  523. else
  524. RTreeMethodZero(p, MINFILL(level, t), t);
  525. /*
  526. * put branches from buffer into 2 nodes
  527. * according to chosen partition
  528. */
  529. (nn)->level = n->level = level;
  530. RTreeLoadNodes(n, nn, p, t);
  531. assert(n->count + nn->count == p->total);
  532. }