/* functions to classify sorted arrays of doubles and fill a vector of classbreaks */

#include <grass/glocale.h>
#include <grass/arraystats.h>

int AS_option_to_algorithm(const struct Option * option)
{
    if (G_strcasecmp(option->answer, "int") == 0)
        return CLASS_INTERVAL;
    if (G_strcasecmp(option->answer, "std") == 0)
        return CLASS_STDEV;
    if (G_strcasecmp(option->answer, "qua") == 0)
        return CLASS_QUANT;
    if (G_strcasecmp(option->answer, "equ") == 0)
        return CLASS_EQUIPROB;
    if (G_strcasecmp(option->answer, "dis") == 0)
        return CLASS_DISCONT;

    G_fatal_error(_("Unknown algorithm '%s'"), option->answer);
}
    
double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks,
                                double *classbreaks)
{
    double finfo = 0.0;

    switch (algo) {
    case CLASS_INTERVAL:
        finfo = AS_class_interval(data, nrec, *nbreaks, classbreaks);
        break;
    case CLASS_STDEV:
        finfo = AS_class_stdev(data, nrec, *nbreaks, classbreaks);
        break;
    case CLASS_QUANT:
        finfo = AS_class_quant(data, nrec, *nbreaks, classbreaks);
        break;
    case CLASS_EQUIPROB:
        finfo = AS_class_equiprob(data, nrec, nbreaks, classbreaks);
        break;
    case CLASS_DISCONT:
        /*	finfo = class_discont(data, nrec, *nbreaks, classbreaks); disabled because of bugs */
        G_fatal_error(_("Discont algorithm currently not available because of bugs"));
        break;
    default:
        break;
    }

    if (finfo == 0)
	G_fatal_error(_("Classification algorithm failed"));

    return finfo;
}

int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
{
    double min, max;
    double step;
    int i = 0;

    min = data[0];
    max = data[count - 1];

    step = (max - min) / (nbreaks + 1);

    for (i = 0; i < nbreaks; i++)
	classbreaks[i] = min + (step * (i + 1));

    return (1);
}

double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
{
    struct GASTATS stats;
    int i;
    int nbclass;
    double scale = 1.0;

    AS_basic_stats(data, count, &stats);

    nbclass = nbreaks + 1;

    if (nbclass % 2 == 1) {	/* number of classes is uneven so we center middle class on mean */

	/* find appropriate fraction of stdev for step */
	i = 1;
	while (i) {
	    if (((stats.mean + stats.stdev * scale / 2) +
		 (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) ||
		((stats.mean - stats.stdev * scale / 2) -
		 (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min))
		scale = scale / 2;
	    else
		i = 0;
	}

	/* classbreaks below the mean */
	for (i = 0; i < nbreaks / 2; i++)
	    classbreaks[i] =
		(stats.mean - stats.stdev * scale / 2) -
		stats.stdev * scale * (nbreaks / 2 - (i + 1));
	/* classbreaks above the mean */
	for (; i < nbreaks; i++)
	    classbreaks[i] =
		(stats.mean + stats.stdev * scale / 2) +
		stats.stdev * scale * (i - nbreaks / 2);

    }
    else {		/* number of classes is even so mean is a classbreak */

	/* decide whether to use 1*stdev or 0.5*stdev as step */
	i = 1;
	while (i) {
	    if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
		 stats.max) ||
		((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
		 stats.min))
		scale = scale / 2;
	    else
		i = 0;
	}

	/* classbreaks below the mean and on the mean */
	for (i = 0; i <= nbreaks / 2; i++)
	    classbreaks[i] =
		stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
	/* classbreaks above the mean */
	for (; i < nbreaks; i++)
	    classbreaks[i] =
		stats.mean + stats.stdev * scale * (i - nbreaks / 2);
    }

    return (scale);
}

int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
{
    int i, step;

    step = count / (nbreaks + 1);

    for (i = 0; i < nbreaks; i++)
	classbreaks[i] = data[step * (i + 1)];

    return (1);
}


int AS_class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
{
    int i, j;
    double *lequi;		/*Vector of scale factors for probabilities of the normal distribution */
    struct GASTATS stats;
    int nbclass;

    nbclass = *nbreaks + 1;

    lequi = G_malloc(*nbreaks * sizeof(double));

    /* The following values come from the normal distribution and will be used as:
     * classbreak[i] = (lequi[i] * stdev) + mean;
     */

    if (nbclass < 3) {
	lequi[0] = 0;
    }
    else if (nbclass == 3) {
	lequi[0] = -0.43076;
	lequi[1] = 0.43076;
    }
    else if (nbclass == 4) {
	lequi[0] = -0.6745;
	lequi[1] = 0;
	lequi[2] = 0.6745;
    }
    else if (nbclass == 5) {
	lequi[0] = -0.8416;
	lequi[1] = -0.2533;
	lequi[2] = 0.2533;
	lequi[3] = 0.8416;
    }
    else if (nbclass == 6) {
	lequi[0] = -0.9676;
	lequi[1] = -0.43076;
	lequi[2] = 0;
	lequi[3] = 0.43076;
	lequi[4] = 0.9676;
    }
    else if (nbclass == 7) {
	lequi[0] = -1.068;
	lequi[1] = -0.566;
	lequi[2] = -0.18;
	lequi[3] = 0.18;
	lequi[4] = 0.566;
	lequi[5] = 1.068;
    }
    else if (nbclass == 8) {
	lequi[0] = -1.1507;
	lequi[1] = -0.6745;
	lequi[2] = -0.3187;
	lequi[3] = 0;
	lequi[4] = 0.3187;
	lequi[5] = 0.6745;
	lequi[6] = 1.1507;
    }
    else if (nbclass == 9) {
	lequi[0] = -1.2208;
	lequi[1] = -0.7648;
	lequi[2] = -0.4385;
	lequi[3] = -0.1397;
	lequi[4] = 0.1397;
	lequi[5] = 0.4385;
	lequi[6] = 0.7648;
	lequi[7] = 1.2208;
    }
    else if (nbclass == 10) {
	lequi[0] = -1.28155;
	lequi[1] = -0.84162;
	lequi[2] = -0.5244;
	lequi[3] = -0.25335;
	lequi[4] = 0;
	lequi[5] = 0.25335;
	lequi[6] = 0.5244;
	lequi[7] = 0.84162;
	lequi[8] = 1.28155;
    }
    else {
	G_fatal_error(_("Equiprobable classbreaks currently limited to 10 classes"));
    }

    AS_basic_stats(data, count, &stats);

    /* Check if any of the classbreaks would fall outside of the range min-max */
    j = 0;
    for (i = 0; i < *nbreaks; i++) {
	if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
	    (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
	    j++;
	}
    }

    if (j < (*nbreaks)) {
	G_warning(_("There are classbreaks outside the range min-max. Number of "
		    "classes reduced to %i, but using probabilities for %i classes."),
		  j + 1, *nbreaks + 1);
	G_realloc(classbreaks, j * sizeof(double));
	for (i = 0; i < j; i++)
	    classbreaks[i] = 0;
    }

    j = 0;
    for (i = 0; i < *nbreaks; i++) {
	if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
	    (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
	    classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
	    j++;
	}
    }

    *nbreaks = j;

    G_free(lequi);
    return (1);
}

/* FIXME: there seems to a problem with array overflow, probably due to
   the fact that the code was ported from fortran which has 1-based arrays*/
double AS_class_discont(double *data, int count, int nbreaks,
		     double *classbreaks)
{
    int *num, nbclass;
    double *no, *zz, *nz, *xn, *co;
    double *x;		/* Vector standardized observations */
    int i, j, k;
    double min = 0, max = 0, rangemax = 0;
    int n = 0;
    double rangemin = 0, xlim = 0;
    double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0;
    int nf = 0, nmax = 0;
    double *abc;
    int nd = 0;
    double den = 0, d = 0;
    int im = 0, ji = 0;
    int tmp = 0;
    int nff = 0, jj = 0, no1 = 0, no2 = 0;
    double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;


    /*get the number of values */
    n = count;

    nbclass = nbreaks + 1;

    num = G_malloc((nbclass + 1) * sizeof(int));
    no = G_malloc((nbclass + 1) * sizeof(double));
    zz = G_malloc((nbclass + 1) * sizeof(double));
    nz = G_malloc(3 * sizeof(double));
    xn = G_malloc((n + 1) * sizeof(double));
    co = G_malloc((nbclass + 1) * sizeof(double));

    /* We copy the array of values to x, in order to be able to standardize it */
    x = G_malloc((n + 1) * sizeof(double));
    x[0] = n;
    xn[0] = 0;

    min = data[0];
    max = data[count - 1];
    for (i = 1; i <= n; i++)
	x[i] = data[i - 1];

    rangemax = max - min;
    rangemin = rangemax;

    for (i = 2; i <= n; i++) {
	if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
	    rangemin = x[i] - x[i - 1];	/* rangemin = minimal distance */
    }

    /* STANDARDIZATION 
     * and creation of the number vector (xn) */

    for (i = 1; i <= n; i++) {
	x[i] = (x[i] - min) / rangemax;
	xn[i] = i / (double)n;
    }
    xlim = rangemin / rangemax;
    rangemin = rangemin / 2.0;

    /* Searching for the limits */
    num[1] = n;
    abc = G_malloc(3 * sizeof(double));

    /*     Loop through possible solutions */
    for (i = 1; i <= nbclass; i++) {
	nmax = 0;
	dmax = 0.0;
	d2 = 0.0;
	nf = 0;			/*End number */

	/*           Loop through classes */
	for (j = 1; j <= i; j++) {
	    nd = nf;		/*Start number */
	    nf = num[j];
	    co[j] = 10e37;
	    AS_eqdrt(x, xn, nd, nf, abc);
	    den = sqrt(pow(abc[1], 2) + 1.0);
	    nd++;
	    /*              Loop through observations */
	    for (k = nd; k <= nf; k++) {
		if (abc[2] == 0.0)
		    d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
		else
		    d = fabs(x[k] - abc[2]);
		d2 += pow(d, 2);
		if (x[k] - x[nd] < xlim)
		    continue;
		if (x[nf] - x[k] < xlim)
		    continue;
		if (d <= dmax)
		    continue;
		dmax = d;
		nmax = k;
	    }
	    nd--;		/* A VERIFIER! */
	    if (x[nf] != x[nd]) {
		if (nd != 0)
		    co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
		else
		    co[j] = (xn[nf]) / (x[nf]);	/* A VERIFIER! */
	    }
	}
	if (i == 1)
	    dd = d2;
	p = d2 / dd;
	for (j = 1; j <= i; j++) {
	    no[j] = num[j];
	    zz[j] = x[num[j]] * rangemax + min;
	    if (j == i)
		continue;
	    if (co[j] > co[j + 1]) {
		zz[j] = zz[j] + rangemin;
		continue;
	    }
	    zz[j] = zz[j] - rangemin;
	    no[j] = no[j] - 1;
	}
	im = i - 1;
	if (im != 0.0) {
	    for (j = 1; j <= im; j++) {
		ji = i + 1 - j;
		no[ji] -= no[ji - 1];
	    }
	}
	if (nmax == 0) {
	    break;
	}
	nff = i + 2;
	tmp = 0;
	for (j = 1; j <= i; j++) {
	    jj = nff - j;
	    if (num[jj - 1] < nmax) {
		num[jj] = nmax;
		tmp = 1;
		break;
	    }
	    num[jj] = num[jj - 1];
	}
	if (tmp == 0) {
	    num[1] = nmax;
	    jj = 1;
	}
	if (jj == 1) {
	    xnj_1 = 0;
	    xj_1 = 0;
	}
	else {
	    xnj_1 = xn[num[jj - 1]];
	    xj_1 = x[num[jj - 1]];
	}
	no1 = (xn[num[jj]] - xnj_1) * n;
	no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
	f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
	f *= n;
	xt1 = (x[num[jj]] - xj_1) * f;
	xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
	if (xt2 == 0) {
	    xt2 = rangemin / 2.0 / rangemax * f;
	    xt1 -= xt2;
	}
	else if (xt1 * xt2 == 0) {
	    xt1 = rangemin / 2.0 / rangemax * f;
	    xt2 -= xt1;
	}

	/* calculate chi-square to indicate statistical significance of new class, i.e. how probable would it be that the new class could be the result of purely random choice */
	if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
	    chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);

    }

    /*  Fill up classbreaks of i <=nbclass classes */
    for (j = 0; j <= (i - 1); j++)
	classbreaks[j] = zz[j + 1];

    return (chi2);
}

int AS_class_frequencies(double *data, int count, int nbreaks,
		      double *classbreaks, int *frequencies)
{
    int i, j;
    double min, max;

    min = data[0];
    max = data[count - 1];
    /* count cases in all classes, except for last class */
    i = 0;
    for (j = 0; j < nbreaks; j++) {
	while (data[i] <= classbreaks[j]) {
	    frequencies[j]++;
	    i++;
	}
    }

    /*Now count cases in last class */
    for (; i < count; i++) {
	frequencies[nbreaks]++;
    }

    return (1);
}