1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288 |
- /****************************************************************
- *
- * MODULE: v.cluster
- *
- * AUTHOR(S): Markus Metz
- *
- * PURPOSE: Identifies clusters in a point cloud
- *
- * COPYRIGHT: (C) 2015 by the GRASS Development Team
- *
- * This program is free software under the
- * GNU General Public License (>=v2).
- * Read the file COPYING that comes with GRASS
- * for details.
- *
- ****************************************************************/
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <math.h>
- #include <grass/gis.h>
- #include <grass/vector.h>
- #include <grass/glocale.h>
- #include <grass/kdtree.h>
- #ifdef MAX
- #undef MAX
- #endif
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
- #define CL_DBSCAN 1
- #define CL_DBSCAN2 2
- #define CL_DENSE 3
- #define CL_OPTICS 4
- #define CL_OPTICS2 5
- #define GET_PARENT(p, c) ((p) = (int) (((c) - 2) / 3 + 1))
- #define GET_CHILD(c, p) ((c) = (int) (((p) * 3) - 1))
- struct cl_pnt
- {
- int uid;
- int prevpnt;
- double cd;
- double reach;
- double c[3];
- };
- static struct cl_pnt *clp;
- static int *heapidx;
- static int heapsize;
- int add_pt(int idx);
- int drop_pt(void);
- int main(int argc, char *argv[])
- {
- struct Map_info In, Out;
- struct line_pnts *Points;
- struct line_cats *Cats;
- int i, j, type, cat, is3d;
- struct GModule *module;
- struct Option *input, *output, *lyr_opt;
- struct Option *dist_opt, *min_opt, *method_opt;
- struct Flag *flag_2d, *flag_topo, *flag_attr;
- int clayer;
- int npoints, nlines;
- int *cid, *idx, *renumber, OLD, NEW;
- int nclusters, noutliers;
- struct kdtree *kdt;
- struct kdtrav trav;
- double c[3];
- int uid;
- double eps;
- int ndims, minpnts;
- int clmethod;
- double *kddist;
- int kdfound, *kduid;
- /* initialize GIS environment */
- /* reads grass env, stores program name to G_program_name() */
- G_gisinit(argv[0]);
- /* initialize module */
- module = G_define_module();
- G_add_keyword(_("vector"));
- G_add_keyword(_("point cloud"));
- G_add_keyword(_("cluster"));
- G_add_keyword(_("clump"));
- G_add_keyword(_("level1"));
- module->description = _("Performs cluster identification.");
- /* Define the different options as defined in gis.h */
- input = G_define_standard_option(G_OPT_V_INPUT);
- output = G_define_standard_option(G_OPT_V_OUTPUT);
- lyr_opt = G_define_standard_option(G_OPT_V_FIELD);
- lyr_opt->label = _("Layer number or name for cluster ids");
- lyr_opt->answer = "2";
- dist_opt = G_define_option();
- dist_opt->type = TYPE_DOUBLE;
- dist_opt->key = "distance";
- dist_opt->required = NO;
- dist_opt->label = _("Maximum distance to neighbors");
- min_opt = G_define_option();
- min_opt->type = TYPE_INTEGER;
- min_opt->key = "min";
- min_opt->required = NO;
- min_opt->label = _("Minimum number of points to create a cluster");
- method_opt = G_define_option();
- method_opt->type = TYPE_STRING;
- method_opt->key = "method";
- method_opt->options = "dbscan,dbscan2,density,optics,optics2";
- method_opt->answer = "dbscan";
- method_opt->required = NO;
- method_opt->label = _("Clustering method");
- flag_2d = G_define_flag();
- flag_2d->key = '2';
- flag_2d->label = _("Force 2D clustering");
- flag_topo = G_define_standard_flag(G_FLG_V_TOPO);
- flag_attr = G_define_standard_flag(G_FLG_V_TABLE);
-
- /* options and flags parser */
- if (G_parser(argc, argv))
- exit(EXIT_FAILURE);
- Points = Vect_new_line_struct();
- Cats = Vect_new_cats_struct();
- Vect_check_input_output_name(input->answer, output->answer, G_FATAL_EXIT);
- if (Vect_set_open_level(1))
- G_fatal_error(_("Unable to set predetermined vector open level"));
- if (1 > Vect_open_old(&In, input->answer, NULL))
- G_fatal_error(_("Unable to open vector map <%s>"), input->answer);
- /* Check if old vector is 3D. We should preserve 3D data. */
- is3d = WITHOUT_Z;
- ndims = 2;
- if (Vect_is_3d(&In)) {
- is3d = WITH_Z;
- ndims = 3;
- }
- if (flag_2d->answer)
- ndims = 2;
- minpnts = ndims;
- if (min_opt->answer) {
- minpnts = atoi(min_opt->answer);
- if (minpnts < 2) {
- G_warning(_("Minimum number of points must be at least 2"));
- minpnts = 2;
- }
- minpnts--;
- }
- clayer = atoi(lyr_opt->answer);
- if (clayer < 1)
- G_fatal_error(_("Option %s must be positive"), lyr_opt->key);
- clmethod = CL_DBSCAN;
- if (!strcmp(method_opt->answer, "dbscan2"))
- clmethod = CL_DBSCAN2;
- else if (!strcmp(method_opt->answer, "density"))
- clmethod = CL_DENSE;
- else if (!strcmp(method_opt->answer, "optics"))
- clmethod = CL_OPTICS;
- else if (!strcmp(method_opt->answer, "optic2"))
- clmethod = CL_OPTICS2;
- /* count points */
- G_message(_("Counting input points ..."));
- npoints = nlines = 0;
- while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
- nlines++;
- if (type == GV_POINT) {
- if (Vect_cat_get(Cats, clayer, &cat))
- G_fatal_error(_("Layer %d is not empty, choose another layer"),
- clayer);
- npoints++;
- }
- }
- if (npoints < minpnts + 1) {
- G_warning(_("Not enough points in input, nothing to do"));
- Vect_close(&In);
- exit(EXIT_SUCCESS);
- }
- /* Open new vector for reading/writing */
- if (0 > Vect_open_new(&Out, output->answer, is3d)) {
- Vect_close(&In);
- G_fatal_error(_("Unable to create vector map <%s>"), output->answer);
- }
- /* Copy header and history data from old to new map */
- Vect_copy_head_data(&In, &Out);
- Vect_hist_copy(&In, &Out);
- Vect_hist_command(&Out);
- /* create k-d tree */
- G_message(_("Creating search index ..."));
- kdt = kdtree_create(ndims, NULL);
- cid = G_malloc((nlines + 1) * sizeof(int));
- idx = G_malloc((nlines + 1) * sizeof(int));
- Vect_rewind(&In);
- i = 0;
- cid[0] = 0;
- idx[0] = 0;
- while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
- G_percent(i++, nlines, 4);
- cid[i] = 0;
- if (type == GV_POINT) {
-
- c[0] = Points->x[0];
- c[1] = Points->y[0];
- c[2] = Points->z[0];
- kdtree_insert(kdt, c, i, 0);
- }
- }
- G_percent(nlines, nlines, 4);
- kdtree_optimize(kdt, 2);
- noutliers = nclusters = 0;
- if (clmethod == CL_DBSCAN) {
- /* DBSCAN
- * the neighbors of each point
- * with at least minpnts neighbors within distance (epsilon)
- * are added to a cluster */
- /* get epsilon */
- if (dist_opt->answer) {
- eps = atof(dist_opt->answer);
- if (eps <= 0)
- G_fatal_error(_("Option %s must be a positive number"), dist_opt->key);
- }
- else {
- int n;
- double dist, mean, min, max, sum, sumsq, sd;
- double *kd;
- int *ki;
- /* estimate epsilon */
- G_message(_("Estimating maximum distance ..."));
- kdtree_init_trav(&trav, kdt);
- c[2] = 0.0;
- n = 0;
- sum = sumsq = 0;
- min = 1.0 / 0.0;
- max = 0;
- kd = G_malloc(minpnts * sizeof(double));
- ki = G_malloc(minpnts * sizeof(int));
- i = 0;
- while (kdtree_traverse(&trav, c, &uid)) {
- G_percent(i++, npoints, 4);
- kdfound = kdtree_knn(kdt, c, ki, kd, minpnts, &uid);
- if (kdfound) {
- dist = sqrt(kd[kdfound - 1]);
- sum += dist;
- sumsq += dist * dist;
- n++;
- if (min > dist)
- min = dist;
- if (max < dist)
- max = dist;
- }
- }
- G_percent(npoints, npoints, 4);
- G_free(kd);
- G_free(ki);
- if (!n)
- G_fatal_error(_("No neighbors found"));
-
- mean = sum / n;
- sd = sqrt(sumsq / n - mean * mean);
- eps = mean + 1.644854 * sd; /* 90% CI */
- eps = mean + 2.575829 * sd; /* 99% CI */
-
- if (eps > max)
- eps = max;
- G_message(_("Distance to the %d nearest neighbor:"), minpnts);
- G_message(_("Min: %g, max: %g"), min, max);
- G_message(_("Mean: %g"), mean);
- G_message(_("Standard deviation: %g"), sd);
- G_message(_("Estimated maximum distance: %g"), eps);
- }
- /* create clusters */
- G_message(_("Building clusters ..."));
- nclusters = 0;
- kdtree_init_trav(&trav, kdt);
- c[2] = 0.0;
- idx[0] = 0;
- i = 0;
- while (kdtree_traverse(&trav, c, &uid)) {
- G_percent(i++, npoints, 4);
- /* radius search */
- /* TODO: use knn search */
- kdfound = kdtree_dnn(kdt, c, &kduid, &kddist, eps, &uid);
- /* must have min neighbors within radius */
- if (kdfound >= minpnts) {
- OLD = cid[uid];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- cat = NEW;
- /* find latest cluster */
- for (j = 0; j < kdfound; j++) {
- OLD = cid[kduid[j]];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- if (cat < NEW) {
- cat = NEW;
- }
- }
- if (cat == 0) {
- /* start new cluster */
- nclusters++;
- cat = nclusters;
- if (nclusters > nlines)
- G_fatal_error(_("nlines: %d, nclusters: %d"), nlines, nclusters);
- idx[nclusters] = nclusters;
- cid[uid] = nclusters;
- }
- /* set or update cluster ids */
- if (cid[uid] != 0) {
- /* relabel */
- OLD = cid[uid];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- idx[NEW] = cat;
- }
- else {
- cid[uid] = cat;
- }
- for (j = 0; j < kdfound; j++) {
- if (cid[kduid[j]] != 0) {
- /* relabel */
- OLD = cid[kduid[j]];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- idx[NEW] = cat;
- }
- else {
- cid[kduid[j]] = cat;
- }
- }
- }
- if (kdfound) {
- G_free(kddist);
- G_free(kduid);
- }
- }
- G_percent(npoints, npoints, 4);
- if (nclusters == 0) {
- G_message(_("No clusters found, adjust option %s"), dist_opt->key);
- Vect_close(&In);
- Vect_close(&Out);
- Vect_delete(output->answer);
- exit(EXIT_SUCCESS);
- }
- /* generate a renumbering scheme */
- G_message(_("Generating renumbering scheme..."));
- G_debug(1, "%d initial clusters", nclusters);
- /* allocate final clump ID */
- renumber = (int *) G_malloc((nclusters + 1) * sizeof(int));
- renumber[0] = 0;
- cat = 1;
- G_percent(0, nclusters, 1);
- for (i = 1; i <= nclusters; i++) {
- G_percent(i, nclusters, 4);
- OLD = i;
- NEW = idx[i];
- if (OLD != NEW) {
- renumber[i] = 0;
- /* find valid clump ID */
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- idx[i] = NEW;
- }
- else
- /* set final clump id */
- renumber[i] = cat++;
- }
- nclusters = cat - 1;
- /* write cluster ids */
- G_message(_("Write out cluster ids ..."));
- Vect_rewind(&In);
- i = 0;
- noutliers = 0;
- while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
- G_percent(i++, nlines, 4);
- if (type == GV_POINT) {
- cat = renumber[idx[cid[i]]];
- if (!cat)
- noutliers++;
- Vect_cat_set(Cats, clayer, cat);
- Vect_write_line(&Out, GV_POINT, Points, Cats);
- }
- }
- G_percent(nlines, nlines, 4);
- }
- else if (clmethod == CL_DBSCAN2) {
- /* DBSCAN, but cluster size must be at least minpnts + 1 */
- int *clcnt;
- /* get epsilon */
- if (dist_opt->answer) {
- eps = atof(dist_opt->answer);
- if (eps <= 0)
- G_fatal_error(_("Option %s must be a positive number"), dist_opt->key);
- }
- else {
- int n;
- double dist, mean, min, max, sum, sumsq, sd;
- double *kd;
- int *ki;
- /* estimate epsilon */
- G_message(_("Estimating maximum distance ..."));
- kdtree_init_trav(&trav, kdt);
- c[2] = 0.0;
- n = 0;
- sum = sumsq = 0;
- min = 1.0 / 0.0;
- max = 0;
- kd = G_malloc(minpnts * sizeof(double));
- ki = G_malloc(minpnts * sizeof(int));
- i = 0;
- while (kdtree_traverse(&trav, c, &uid)) {
- G_percent(i++, npoints, 4);
- kdfound = kdtree_knn(kdt, c, ki, kd, minpnts, &uid);
- if (kdfound) {
- dist = sqrt(kd[kdfound - 1]);
- sum += dist;
- sumsq += dist * dist;
- n++;
- if (min > dist)
- min = dist;
- if (max < dist)
- max = dist;
- }
- }
- G_percent(npoints, npoints, 4);
- G_free(kd);
- G_free(ki);
- if (!n)
- G_fatal_error(_("No neighbors found"));
-
- mean = sum / n;
- sd = sqrt(sumsq / n - mean * mean);
- eps = mean + 1.644854 * sd; /* 90% CI */
- eps = mean + 2.575829 * sd; /* 99% CI */
-
- if (eps > max)
- eps = max;
- G_message(_("Distance to the %d nearest neighbor:"), minpnts);
- G_message(_("Min: %g, max: %g"), min, max);
- G_message(_("Mean: %g"), mean);
- G_message(_("Standard deviation: %g"), sd);
- G_message(_("Estimated maximum distance: %g"), eps);
- }
- /* create clusters */
- G_message(_("Building clusters ..."));
- clcnt = G_malloc((nlines + 1) * sizeof(int));
- for (i = 0; i <= nlines; i++)
- clcnt[i] = 0;
- nclusters = 0;
- kdtree_init_trav(&trav, kdt);
- c[2] = 0.0;
- idx[0] = 0;
- i = 0;
- while (kdtree_traverse(&trav, c, &uid)) {
- G_percent(i++, npoints, 4);
- /* radius search */
- /* TODO: use knn search */
- kdfound = kdtree_dnn(kdt, c, &kduid, &kddist, eps, &uid);
-
- /* any neighbor within radius */
- if (kdfound > 0) {
- OLD = cid[uid];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- cat = NEW;
- /* find latest cluster */
- for (j = 0; j < kdfound; j++) {
- OLD = cid[kduid[j]];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- if (cat < NEW) {
- cat = NEW;
- }
- }
- if (cat == 0) {
- /* start new cluster */
- nclusters++;
- cat = nclusters;
- if (nclusters > nlines)
- G_fatal_error(_("nlines: %d, nclusters: %d"), nlines, nclusters);
- idx[nclusters] = nclusters;
- cid[uid] = nclusters;
- clcnt[cat] = 1;
- }
- /* set or update cluster ids */
- if (cid[uid] != 0) {
- /* relabel */
- OLD = cid[uid];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- idx[NEW] = cat;
- }
- else {
- cid[uid] = cat;
- clcnt[cat]++;
- }
- for (j = 0; j < kdfound; j++) {
- if (cid[kduid[j]] != 0) {
- OLD = cid[kduid[j]];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- /* relabel */
- idx[NEW] = cat;
- }
- else {
- cid[kduid[j]] = cat;
- clcnt[cat]++;
- }
- }
- G_free(kddist);
- G_free(kduid);
- }
- }
- G_percent(npoints, npoints, 4);
- if (nclusters == 0) {
- G_message(_("No clusters found, adjust option %s"), dist_opt->key);
- Vect_close(&In);
- Vect_close(&Out);
- Vect_delete(output->answer);
- exit(EXIT_SUCCESS);
- }
- /* generate a renumbering scheme */
- G_message(_("Generating renumbering scheme..."));
- G_debug(1, "%d initial clusters", nclusters);
- /* allocate final clump ID */
- renumber = (int *) G_malloc((nclusters + 1) * sizeof(int));
- renumber[0] = 0;
- cat = 1;
- G_percent(0, nclusters, 1);
- for (i = 1; i <= nclusters; i++) {
- G_percent(i, nclusters, 4);
- OLD = i;
- NEW = idx[i];
- if (OLD != NEW) {
- /* find valid clump ID */
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- idx[i] = NEW;
- clcnt[NEW] += clcnt[i];
- }
- }
- for (i = 1; i <= nclusters; i++) {
- OLD = i;
- NEW = idx[i];
- renumber[i] = 0;
- if (OLD == NEW && clcnt[NEW] > minpnts) {
- /* set final clump id */
- renumber[i] = cat++;
- }
- }
- nclusters = cat - 1;
- /* write cluster ids */
- G_message(_("Write out cluster ids ..."));
- Vect_rewind(&In);
- i = 0;
- noutliers = 0;
- while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
- G_percent(i++, nlines, 4);
- if (type == GV_POINT) {
- cat = renumber[idx[cid[i]]];
- if (!cat)
- noutliers++;
- Vect_cat_set(Cats, clayer, cat);
- Vect_write_line(&Out, GV_POINT, Points, Cats);
- }
- }
- G_percent(nlines, nlines, 4);
- }
- else if (clmethod == CL_OPTICS) {
- /* OPTICS
- * each pair of points is either directly connected or
- * connected by a chain of other points
- * for each unprocessed point p
- * mark as processed, append to output list
- * core distance of p: distance to the k-th neighbor
- * q: neighbor of p
- * reachability of q: max(dist(p, q), coredist(p))
- * -> needs epsilon, otherwise always coredist(p)
- * for each unprocessed neighbor q
- * if q has not been reached yet, put q in min heap
- * if q's reachability can be reduced, put q with new reachability in min heap
- * proceed with point with smallest reachability
- * clusters:
- * plot x = position in output list, y = reachability
- * clusters = valleys of reachability in plot
- * hierarchical clusters: valleys in valleys
- */
- double *kd;
- int *ki;
- int k, kdpnts;
- int *clidx;
- int *olist, nout;
- double newrd;
- int isout;
- kd = G_malloc(minpnts * sizeof(double));
- ki = G_malloc(minpnts * sizeof(int));
- clp = G_malloc((npoints + 1) * sizeof(struct cl_pnt));
- heapidx = G_malloc((npoints + 1) * sizeof(int));
- olist = G_malloc((npoints + 1) * sizeof(int));
- clidx = G_malloc((nlines + 1) * sizeof(int));
-
- heapsize = 0;
- /* get epsilon */
- eps = 0;
- if (dist_opt->answer) {
- eps = atof(dist_opt->answer);
- if (eps <= 0)
- G_fatal_error(_("Option %s must be a positive number"), dist_opt->key);
- }
- /* loading points */
- G_message(_("Loading points ..."));
- kdtree_init_trav(&trav, kdt);
- c[2] = 0.0;
- i = 0;
- while (kdtree_traverse(&trav, c, &uid)) {
- G_percent(i, npoints, 4);
-
- clp[i].c[0] = c[0];
- clp[i].c[1] = c[1];
- clp[i].c[2] = c[2];
- clp[i].uid = uid;
- clp[i].cd = -1;
- clp[i].reach = -1;
- clp[i].prevpnt = -1;
- clidx[uid] = i;
- olist[i] = -1;
-
- i++;
- }
- G_percent(npoints, npoints, 4);
- kdpnts = i;
- G_debug(0, "%d points in k-d tree", kdpnts);
- /* reachability network */
- G_message(_("Reachability network ..."));
- nout = 0;
- for (i = 0; i < kdpnts; i++) {
- G_percent(i, kdpnts, 4);
-
- if (clp[i].cd > 0)
- continue;
- /* knn search */
- uid = clp[i].uid;
- kdfound = kdtree_knn(kdt, clp[i].c, ki, kd, minpnts, &uid);
- if (kdfound < minpnts)
- G_fatal_error(_("Not enough points found"));
- clp[i].cd = kd[minpnts - 1];
- /* no reachability for the seed point !!! */
- clp[i].reach = clp[i].cd; /* ok ? */
- olist[nout++] = i;
-
- /* initialize heap */
- newrd = clp[i].cd;
- for (j = 0; j < kdfound; j++) {
- if (clp[clidx[ki[j]]].cd < 0) {
- /* deviation from OPTICS,
- * creates nicer connectivity graph */
- newrd = kd[j];
- if (clp[clidx[ki[j]]].reach < 0 || clp[clidx[ki[j]]].reach > newrd) {
- clp[clidx[ki[j]]].reach = newrd;
- clp[clidx[ki[j]]].prevpnt = i;
- add_pt(clidx[ki[j]]);
- }
- }
- }
- while (heapsize) {
- k = drop_pt();
- if (k < 0 || k >= kdpnts)
- G_fatal_error("Invalid index");
- if (clp[k].cd > 0)
- continue;
- /* knn search */
- uid = clp[k].uid;
- kdfound = kdtree_knn(kdt, clp[k].c, ki, kd, minpnts, &uid);
- if (kdfound < minpnts)
- G_fatal_error(_("Not enough points found"));
- clp[k].cd = kd[minpnts - 1];
- olist[nout++] = k;
- newrd = clp[k].cd;
- for (j = 0; j < kdfound; j++) {
- if (heapsize >= npoints)
- G_fatal_error("Heap is too large");
- if (clp[clidx[ki[j]]].cd < 0) {
- /* deviation from OPTICS,
- * creates nicer connectivity graph */
- newrd = kd[j];
- if (clp[clidx[ki[j]]].reach < 0 || clp[clidx[ki[j]]].reach > newrd) {
- clp[clidx[ki[j]]].reach = newrd;
- clp[clidx[ki[j]]].prevpnt = k;
- add_pt(clidx[ki[j]]);
- }
- }
- }
- }
- }
- G_percent(kdpnts, kdpnts, 4);
- G_debug(0, "nout: %d", nout);
- if (nout != kdpnts)
- G_fatal_error("nout != kdpnts");
- /* set cluster ids */
- G_message(_("Set cluster ids ..."));
- isout = 1;
- nclusters = 0;
- for (i = 0; i < kdpnts; i++) {
- G_percent(i, kdpnts, 4);
- if (eps > 0 && clp[olist[i]].reach > eps)
- isout = 1;
- else {
- if (isout || clp[olist[i]].prevpnt == -1) {
- isout = 0;
- nclusters++;
- }
- cid[clp[olist[i]].uid] = nclusters;
- }
- }
- G_percent(kdpnts, kdpnts, 4);
- /* write cluster ids */
- G_message(_("Write out cluster ids ..."));
- Vect_rewind(&In);
- i = 0;
- noutliers = 0;
- while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
- G_percent(i++, nlines, 4);
- if (type == GV_POINT) {
- cat = cid[i];
- if (!cat)
- noutliers++;
- Vect_cat_set(Cats, clayer, cat);
- Vect_write_line(&Out, GV_POINT, Points, Cats);
- }
- }
- G_percent(nlines, nlines, 4);
- }
- else if (clmethod == CL_OPTICS2) {
- /* OPTICS modified, create separated reachability networks
- * for each point p
- * get p's core distance
- * get p's neighbors
- * for each neighbor q
- * new reachability of q: dist(p, q)
- * if q has been processed
- * new reachability of q: max(coredist(q), dist(p, q))
- * set or reduce q's reachability
- * connect q to p if q's reachability can be updated
- */
- double *coredist;
- double *reachability;
- int *nextpnt;
- double *kd;
- int *ki;
- double newrd;
- coredist = G_malloc((nlines + 1) * sizeof(double));
- reachability = G_malloc((nlines + 1) * sizeof(double));
- nextpnt = G_malloc((nlines + 1) * sizeof(int));
- kd = G_malloc(minpnts * sizeof(double));
- ki = G_malloc(minpnts * sizeof(int));
- for (i = 0; i <= nlines; i++) {
- coredist[i] = -1;
- reachability[i] = -1;
- nextpnt[i] = -1;
- cid[i] = 0;
- idx[i] = 0;
- }
- /* reachability network */
- G_message(_("Reachability network ..."));
- kdtree_init_trav(&trav, kdt);
- c[2] = 0.0;
- i = 0;
- while (kdtree_traverse(&trav, c, &uid)) {
- G_percent(i++, npoints, 4);
- /* knn search */
- kdfound = kdtree_knn(kdt, c, ki, kd, minpnts, &uid);
- if (kdfound < minpnts)
- G_fatal_error(_("Not enough points found"));
- coredist[uid] = kd[minpnts - 1];
- for (j = 0; j < kdfound; j++) {
- /* new reachability */
- newrd = kd[j];
- if (coredist[ki[j]] > kd[j]) {
- /* do not connect a point to its own cluster
- * because points in its own cluster
- * have already been connected to this point
- * or reconnected */
- newrd = coredist[ki[j]];
- }
- if (reachability[ki[j]] == -1 || reachability[ki[j]] > newrd) {
- reachability[ki[j]] = newrd;
- nextpnt[ki[j]] = uid;
- /* no link - back link */
- if (nextpnt[uid] == ki[j]) {
- if (coredist[ki[j]] == -1) {
- G_fatal_error(_("Neighbor point's core dist is -1"));
- }
- if (coredist[ki[j]] < coredist[uid]) {
- nextpnt[ki[j]] = -1;
- reachability[ki[j]] = -1;
- nextpnt[uid] = ki[j];
- }
- else {
- nextpnt[uid] = -1;
- reachability[uid] = -1;
- reachability[uid] = -1;
- }
- }
- }
- }
- }
- G_percent(npoints, npoints, 4);
- /* create clusters from reachability network */
- G_message(_("Building clusters ..."));
- G_percent(0, nlines, 4);
- for (i = 1; i <= nlines; i++) {
- G_percent(i, nlines, 4);
- if (cid[i] > 0 || coredist[i] == -1 || nextpnt[i] == -1)
- continue;
- if (cid[nextpnt[i]] > 0) {
- cid[i] = idx[cid[nextpnt[i]]];
- }
- else {
- /* start new cluster */
- nclusters++;
- cid[i] = nclusters;
- idx[nclusters] = nclusters;
- uid = nextpnt[i];
- while (uid > 0) {
- if (cid[uid] == 0) {
- cid[uid] = nclusters;
- uid = nextpnt[uid];
- }
- else {
- /* relabel */
- OLD = cid[uid];
- NEW = idx[OLD];
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- idx[NEW] = nclusters;
- uid = nextpnt[uid];
- uid = -1;
- }
- }
- }
- }
- /* generate a renumbering scheme */
- G_message(_("Generating renumbering scheme..."));
- G_debug(1, "%d initial clusters", nclusters);
- /* allocate final clump ID */
- renumber = (int *) G_malloc((nclusters + 1) * sizeof(int));
- renumber[0] = 0;
- cat = 1;
- G_percent(0, nclusters, 1);
- for (i = 1; i <= nclusters; i++) {
- G_percent(i, nclusters, 4);
- OLD = i;
- NEW = idx[i];
- if (OLD != NEW) {
- renumber[i] = 0;
- /* find valid clump ID */
- while (OLD != NEW) {
- OLD = NEW;
- NEW = idx[OLD];
- }
- idx[i] = NEW;
- }
- else
- /* set final clump id */
- renumber[i] = cat++;
- }
- nclusters = cat - 1;
- /* write cluster ids */
- G_message(_("Write out cluster ids ..."));
- Vect_rewind(&In);
- i = 0;
- noutliers = 0;
- while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
- G_percent(i++, nlines, 4);
- if (type == GV_POINT) {
- cat = renumber[idx[cid[i]]];
- if (!cat)
- noutliers++;
- Vect_cat_set(Cats, clayer, cat);
- Vect_write_line(&Out, GV_POINT, Points, Cats);
- }
- }
- G_percent(nlines, nlines, 4);
- }
- else if (clmethod == CL_DENSE) {
- /* MATRUSKA
- * clusters in clusters in clusters ...
- * calculate core density = distance to (minpnts - 1) for each point
- * sort points ascending by core density
- * for each point in sorted list
- * if point does not have a cluster id
- * start new cluster, cluster reachability is core density of this point
- * connect all points within cluster reachability
- * add all connected points to list
- * while list is not empty
- * remove last point from list
- * connect all points within cluster reachability
- * add all connected points to list
- * */
- int *clidx;
- double *kd;
- int *ki;
- double cd;
- int k, kdcount;
- struct ilist *CList;
- clp = G_malloc((nlines + 1) * sizeof(struct cl_pnt));
- clidx = G_malloc((nlines + 1) * sizeof(int));
- kd = G_malloc(minpnts * sizeof(double));
- ki = G_malloc(minpnts * sizeof(int));
- CList = G_new_ilist();
- for (i = 0; i <= nlines; i++) {
- clp[i].c[0] = 0;
- clp[i].c[1] = 0;
- clp[i].c[2] = 0;
- clp[i].cd = -1;
- clp[i].uid = -1;
- clidx[i] = -1;
- }
- /* core density */
- G_message(_("Core density ..."));
- kdtree_init_trav(&trav, kdt);
- c[2] = 0.0;
- i = 0;
- kdcount = 0;
- uid = -1;
- while (kdtree_traverse(&trav, c, &uid)) {
- G_percent(i++, npoints, 4);
- /* knn search */
- kdfound = kdtree_knn(kdt, c, ki, kd, minpnts, &uid);
- if (kdfound < minpnts)
- G_fatal_error(_("Not enough points found"));
- cd = kd[minpnts - 1];
- /* list insert */
- for (j = kdcount; j > 0; j--) {
- if (clp[j - 1].cd <= cd)
- break;
- clp[j] = clp[j - 1];
- clidx[clp[j].uid] = j;
- }
- clp[j].uid = uid;
- clp[j].c[0] = c[0];
- clp[j].c[1] = c[1];
- clp[j].c[2] = c[2];
- clp[j].cd = cd;
- clidx[clp[j].uid] = j;
- kdcount++;
- }
- G_percent(npoints, npoints, 4);
- /* create clusters */
- G_message(_("Building clusters ..."));
- nclusters = 0;
- for (i = 0; i < kdcount; i++) {
- G_percent(i, kdcount, 4);
- if (cid[clp[i].uid] > 0)
- continue;
- /* knn search */
- kdfound = kdtree_knn(kdt, clp[i].c, ki, kd, minpnts, &clp[i].uid);
- if (kdfound < minpnts)
- G_fatal_error(_("Not enough points found"));
- /* start a new cluster */
- uid = clp[i].uid;
- nclusters++;
- cat = nclusters;
- cid[uid] = cat;
- cd = clp[i].cd;
- CList->n_values = 0;
- for (j = 0; j < kdfound; j++) {
- if (cid[ki[j]] == 0) {
- G_ilist_add(CList, clidx[ki[j]]);
- cid[ki[j]] = cat;
- }
- }
- if (CList->n_values < minpnts) {
- CList->n_values = 0;
- nclusters--;
- cid[uid] = 0;
- for (j = 0; j < kdfound; j++) {
- if (cid[ki[j]] == cat) {
- cid[ki[j]] = 0;
- }
- }
- }
- while (CList->n_values) {
- /* expand cluster */
- CList->n_values--;
- k = CList->value[CList->n_values];
- if (k < 0)
- G_fatal_error("expand cluster: k < 0");
- if (clp[k].uid < 1)
- G_fatal_error("expand cluster: clp[k].uid < 1");
- kdfound = kdtree_knn(kdt, clp[k].c, ki, kd, minpnts, &clp[k].uid);
- if (kdfound < minpnts)
- G_fatal_error(_("Not enough points found"));
- for (j = 0; j < kdfound; j++) {
- if (kd[j] <= cd && cid[ki[j]] == 0) {
- cid[ki[j]] = cat;
- if (clidx[ki[j]] < 0)
- G_fatal_error("expand cluster ngbrs: clidx[ki[j]] < 0");
- G_ilist_add(CList, clidx[ki[j]]);
- }
- }
- }
- }
- G_percent(kdcount, kdcount, 4);
- /* write cluster ids */
- G_message(_("Write out cluster ids ..."));
- Vect_rewind(&In);
- i = 0;
- noutliers = 0;
- while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
- G_percent(i++, nlines, 4);
- if (type == GV_POINT) {
- cat = cid[i];
- if (!cat)
- noutliers++;
- Vect_cat_set(Cats, clayer, cat);
- Vect_write_line(&Out, GV_POINT, Points, Cats);
- }
- }
- G_percent(nlines, nlines, 4);
- }
- if (!flag_attr->answer)
- Vect_copy_tables(&In, &Out, 0);
- /* Build topology for vector map and close them */
- Vect_close(&In);
- if (!flag_topo->answer)
- Vect_build(&Out);
- Vect_close(&Out);
- G_message(_("%d clusters found"), nclusters);
- G_message(_("%d outliers found"), noutliers);
- exit(EXIT_SUCCESS);
- }
- /* min heap */
- /* compare heap points */
- int cmp_pnt(int a, int b)
- {
- if (clp[a].reach < clp[b].reach)
- return 1;
- if (clp[a].reach > clp[b].reach)
- return 0;
- if (clp[a].uid < clp[b].uid)
- return 1;
- return 0;
- }
- /* standard sift-up routine for d-ary min heap */
- static int sift_up(int start)
- {
- register int parent, child, child_added;
- child = start;
- child_added = heapidx[child];
- while (child > 1) {
- GET_PARENT(parent, child);
- /* child smaller */
- if (cmp_pnt(child_added, heapidx[parent])) {
- /* push parent point down */
- heapidx[child] = heapidx[parent];
- child = parent;
- }
- else
- /* no more sifting up, found new slot for child */
- break;
- }
- /* put point in new slot */
- if (child < start) {
- heapidx[child] = child_added;
- }
- return 0;
- }
- /* add point routine for min heap */
- int add_pt(int idx)
- {
- /* add point to next free position */
- heapsize++;
- heapidx[heapsize] = idx;
- /* sift up: move new point towards top of heap */
- sift_up(heapsize);
- return 0;
- }
- /* drop point routine for min heap */
- int drop_pt(void)
- {
- register int child, childr, parent;
- register int i, idx;
- idx = heapidx[1];
- if (heapsize == 1) {
- heapidx[1] = -1;
- heapsize = 0;
- return idx;
- }
- /* start with root */
- parent = 1;
- /* sift down: move hole back towards bottom of heap */
- while (GET_CHILD(child, parent) <= heapsize) {
- i = child + 3;
- for (childr = child + 1; childr <= heapsize && childr < i; childr++) {
- if (cmp_pnt(heapidx[childr], heapidx[child])) {
- child = childr;
- }
- }
- /* move hole down */
- heapidx[parent] = heapidx[child];
- parent = child;
- }
- /* hole is in lowest layer, move to heap end */
- if (parent < heapsize) {
- heapidx[parent] = heapidx[heapsize];
- /* sift up last swapped point, only necessary if hole moved to heap end */
- sift_up(parent);
- }
- /* the actual drop */
- heapsize--;
- return idx;
- }
|