GRASS Programmer's Manual  6.4.2(2012)
class.c
Go to the documentation of this file.
00001 /* functions to classify sorted arrays of doubles and fill a vector of classbreaks */
00002 
00003 #include <grass/glocale.h>
00004 #include <grass/arraystats.h>
00005 
00006 double class_apply_algorithm(char *algo, double *data, int nrec, int *nbreaks,
00007                              double *classbreaks)
00008 {
00009     double finfo = 0.0;
00010 
00011     if (G_strcasecmp(algo, "int") == 0)
00012         finfo = class_interval(data, nrec, *nbreaks, classbreaks);
00013     else if (G_strcasecmp(algo, "std") == 0)
00014         finfo = class_stdev(data, nrec, *nbreaks, classbreaks);
00015     else if (G_strcasecmp(algo, "qua") == 0)
00016         finfo = class_quant(data, nrec, *nbreaks, classbreaks);
00017     else if (G_strcasecmp(algo, "equ") == 0)
00018         finfo = class_equiprob(data, nrec, nbreaks, classbreaks);
00019     else if (G_strcasecmp(algo, "dis") == 0)
00020             /*  finfo = class_discont(data, nrec, *nbreaks, classbreaks); disabled because of bugs */
00021         G_fatal_error(_("Discont algorithm currently not available because of bugs"));
00022     else
00023         G_fatal_error(_("%s: Unknown algorithm"), algo);
00024 
00025     if (finfo == 0)
00026         G_fatal_error(_("%s: Error in classification algorithm"), algo);
00027 
00028     return finfo;
00029 }
00030 
00031 int class_interval(double *data, int count, int nbreaks, double *classbreaks)
00032 {
00033     double min, max;
00034     double step;
00035     int i = 0;
00036 
00037     min = data[0];
00038     max = data[count - 1];
00039 
00040     step = (max - min) / (nbreaks + 1);
00041 
00042     for (i = 0; i < nbreaks; i++)
00043         classbreaks[i] = min + (step * (i + 1));
00044 
00045     return (1);
00046 }
00047 
00048 double class_stdev(double *data, int count, int nbreaks, double *classbreaks)
00049 {
00050     struct GASTATS stats;
00051     int i;
00052     int nbclass;
00053     double scale = 1.0;
00054 
00055     basic_stats(data, count, &stats);
00056 
00057     nbclass = nbreaks + 1;
00058 
00059     if (nbclass % 2 == 1) {     /* number of classes is uneven so we center middle class on mean */
00060 
00061         /* find appropriate fraction of stdev for step */
00062         i = 1;
00063         while (i) {
00064             if (((stats.mean + stats.stdev * scale / 2) +
00065                  (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) ||
00066                 ((stats.mean - stats.stdev * scale / 2) -
00067                  (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min))
00068                 scale = scale / 2;
00069             else
00070                 i = 0;
00071         }
00072 
00073         /* classbreaks below the mean */
00074         for (i = 0; i < nbreaks / 2; i++)
00075             classbreaks[i] =
00076                 (stats.mean - stats.stdev * scale / 2) -
00077                 stats.stdev * scale * (nbreaks / 2 - (i + 1));
00078         /* classbreaks above the mean */
00079         for (i = i; i < nbreaks; i++)
00080             classbreaks[i] =
00081                 (stats.mean + stats.stdev * scale / 2) +
00082                 stats.stdev * scale * (i - nbreaks / 2);
00083 
00084     }
00085     else {                      /* number of classes is even so mean is a classbreak */
00086 
00087         /* decide whether to use 1*stdev or 0.5*stdev as step */
00088         i = 1;
00089         while (i) {
00090             if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
00091                  stats.max) ||
00092                 ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
00093                  stats.min))
00094                 scale = scale / 2;
00095             else
00096                 i = 0;
00097         }
00098 
00099         /* classbreaks below the mean and on the mean */
00100         for (i = 0; i <= nbreaks / 2; i++)
00101             classbreaks[i] =
00102                 stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
00103         /* classbreaks above the mean */
00104         for (i = i; i < nbreaks; i++)
00105             classbreaks[i] =
00106                 stats.mean + stats.stdev * scale * (i - nbreaks / 2);
00107     }
00108 
00109     return (scale);
00110 }
00111 
00112 int class_quant(double *data, int count, int nbreaks, double *classbreaks)
00113 {
00114     int i, step;
00115 
00116     step = count / (nbreaks + 1);
00117 
00118     for (i = 0; i < nbreaks; i++)
00119         classbreaks[i] = data[step * (i + 1)];
00120 
00121     return (1);
00122 }
00123 
00124 
00125 int class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
00126 {
00127     int i, j;
00128     double *lequi;              /*Vector of scale factors for probabilities of the normal distribution */
00129     struct GASTATS stats;
00130     int nbclass;
00131 
00132     nbclass = *nbreaks + 1;
00133 
00134     lequi = G_malloc(*nbreaks * sizeof(double));
00135 
00136     /*The following values come from the normal distribution and 
00137      * will be used as: 
00138      * classbreak[i] = (lequi[i] * stdev) + mean;
00139      */
00140 
00141     if (nbclass < 3) {
00142         lequi[0] = 0;
00143     }
00144     else if (nbclass == 3) {
00145         lequi[0] = -0.43076;
00146         lequi[1] = 0.43076;
00147     }
00148     else if (nbclass == 4) {
00149         lequi[0] = -0.6745;
00150         lequi[1] = 0;
00151         lequi[2] = 0.6745;
00152     }
00153     else if (nbclass == 5) {
00154         lequi[0] = -0.8416;
00155         lequi[1] = -0.2533;
00156         lequi[2] = 0.2533;
00157         lequi[3] = 0.8416;
00158     }
00159     else if (nbclass == 6) {
00160         lequi[0] = -0.9676;
00161         lequi[1] = -0.43076;
00162         lequi[2] = 0;
00163         lequi[3] = 0.43076;
00164         lequi[4] = 0.9676;
00165     }
00166     else if (nbclass == 7) {
00167         lequi[0] = -1.068;
00168         lequi[1] = -0.566;
00169         lequi[2] = -0.18;
00170         lequi[3] = 0.18;
00171         lequi[4] = 0.566;
00172         lequi[5] = 1.068;
00173     }
00174     else if (nbclass == 8) {
00175         lequi[0] = -1.1507;
00176         lequi[1] = -0.6745;
00177         lequi[2] = -0.3187;
00178         lequi[3] = 0;
00179         lequi[4] = 0.3187;
00180         lequi[5] = 0.6745;
00181         lequi[6] = 1.1507;
00182     }
00183     else if (nbclass == 9) {
00184         lequi[0] = -1.2208;
00185         lequi[1] = -0.7648;
00186         lequi[2] = -0.4385;
00187         lequi[3] = -0.1397;
00188         lequi[4] = 0.1397;
00189         lequi[5] = 0.4385;
00190         lequi[6] = 0.7648;
00191         lequi[7] = 1.2208;
00192     }
00193     else if (nbclass == 10) {
00194         lequi[0] = -1.28155;
00195         lequi[1] = -0.84162;
00196         lequi[2] = -0.5244;
00197         lequi[3] = -0.25335;
00198         lequi[4] = 0;
00199         lequi[5] = 0.25335;
00200         lequi[6] = 0.5244;
00201         lequi[7] = 0.84162;
00202         lequi[8] = 1.28155;
00203     }
00204     else {
00205         G_fatal_error
00206             ("Equiprobable classbreaks currently limited to 10 classes");
00207     }
00208 
00209     basic_stats(data, count, &stats);
00210 
00211     /*check if any of the classbreaks would fall outside of the range min-max */
00212     j = 0;
00213     for (i = 0; i < *nbreaks; i++) {
00214         if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
00215             (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
00216             j++;
00217         }
00218     }
00219 
00220     if (j < (*nbreaks)) {
00221         G_warning(_("There are classbreaks outside the range min-max. Number of classes reduced to %i, but using probabilities for %i classes."),
00222                   j + 1, *nbreaks + 1);
00223         G_realloc(classbreaks, j * sizeof(double));
00224         for (i = 0; i < j; i++)
00225             classbreaks[i] = 0;
00226     }
00227 
00228     j = 0;
00229     for (i = 0; i < *nbreaks; i++) {
00230         if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
00231             (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
00232             classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
00233             j++;
00234         }
00235     }
00236 
00237     *nbreaks = j;
00238 
00239     G_free(lequi);
00240     return (1);
00241 }
00242 
00243 /* FixMe: there seems to a problem with array overflow, probably due to the fact that the code was ported from fortran which has 1-based arrays*/
00244 double class_discont(double *data, int count, int nbreaks,
00245                      double *classbreaks)
00246 {
00247     int *num, nbclass;
00248     double *no, *zz, *nz, *xn, *co;
00249     double *x;                  //Vecteur des observations standardisées
00250     int i, j, k;
00251     double min = 0, max = 0, rangemax = 0;
00252     int n = 0;
00253     double rangemin = 0, xlim = 0;
00254     double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0;
00255     int nf = 0, nmax = 0;
00256     double *abc;
00257     int nd = 0;
00258     double den = 0, d = 0;
00259     int im = 0, ji = 0;
00260     int tmp = 0;
00261     int nff = 0, jj = 0, no1 = 0, no2 = 0;
00262     double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;
00263 
00264 
00265     /*get the number of values */
00266     n = count;
00267 
00268     nbclass = nbreaks + 1;
00269 
00270     num = G_malloc((nbclass + 1) * sizeof(int));
00271     no = G_malloc((nbclass + 1) * sizeof(double));
00272     zz = G_malloc((nbclass + 1) * sizeof(double));
00273     nz = G_malloc(3 * sizeof(double));
00274     xn = G_malloc((n + 1) * sizeof(double));
00275     co = G_malloc((nbclass + 1) * sizeof(double));
00276 
00277     /* We copy the array of values to x, in order to be able to standardize it */
00278     x = G_malloc((n + 1) * sizeof(double));
00279     x[0] = n;
00280     xn[0] = 0;
00281 
00282     min = data[0];
00283     max = data[count - 1];
00284     for (i = 1; i <= n; i++)
00285         x[i] = data[i - 1];
00286 
00287     rangemax = max - min;
00288     rangemin = rangemax;
00289 
00290     for (i = 2; i <= n; i++) {
00291         if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
00292             rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */
00293     }
00294 
00295     /* STANDARDIZATION 
00296      * and creation of the number vector (xn) */
00297 
00298     for (i = 1; i <= n; i++) {
00299         x[i] = (x[i] - min) / rangemax;
00300         xn[i] = i / (double)n;
00301     }
00302     xlim = rangemin / rangemax;
00303     rangemin = rangemin / 2.0;
00304 
00305     /* Searching for the limits */
00306     num[1] = n;
00307     abc = G_malloc(3 * sizeof(double));
00308 
00309     /*     Loop through possible solutions */
00310     for (i = 1; i <= nbclass; i++) {
00311         nmax = 0;
00312         dmax = 0.0;
00313         d2 = 0.0;
00314         nf = 0;                 /*End number */
00315 
00316         /*           Loop through classes */
00317         for (j = 1; j <= i; j++) {
00318             nd = nf;            /*Start number */
00319             nf = num[j];
00320             co[j] = 10e37;
00321             eqdrt(x, xn, nd, nf, abc);
00322             den = sqrt(pow(abc[1], 2) + 1.0);
00323             nd++;
00324             /*              Loop through observations */
00325             for (k = nd; k <= nf; k++) {
00326                 if (abc[2] == 0.0)
00327                     d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
00328                 else
00329                     d = fabs(x[k] - abc[2]);
00330                 d2 += pow(d, 2);
00331                 if (x[k] - x[nd] < xlim)
00332                     continue;
00333                 if (x[nf] - x[k] < xlim)
00334                     continue;
00335                 if (d <= dmax)
00336                     continue;
00337                 dmax = d;
00338                 nmax = k;
00339             }
00340             nd--;               //A VERIFIER!
00341             if (x[nf] != x[nd]) {
00342                 if (nd != 0)
00343                     co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
00344                 else
00345                     co[j] = (xn[nf]) / (x[nf]); //A VERIFIER!
00346             }
00347         }
00348         if (i == 1)
00349             dd = d2;
00350         p = d2 / dd;
00351         for (j = 1; j <= i; j++) {
00352             no[j] = num[j];
00353             zz[j] = x[num[j]] * rangemax + min;
00354             if (j == i)
00355                 continue;
00356             if (co[j] > co[j + 1]) {
00357                 zz[j] = zz[j] + rangemin;
00358                 continue;
00359             }
00360             zz[j] = zz[j] - rangemin;
00361             no[j] = no[j] - 1;
00362         }
00363         im = i - 1;
00364         if (im != 0.0) {
00365             for (j = 1; j <= im; j++) {
00366                 ji = i + 1 - j;
00367                 no[ji] -= no[ji - 1];
00368             }
00369         }
00370         if (nmax == 0) {
00371             break;
00372         }
00373         nff = i + 2;
00374         tmp = 0;
00375         for (j = 1; j <= i; j++) {
00376             jj = nff - j;
00377             if (num[jj - 1] < nmax) {
00378                 num[jj] = nmax;
00379                 tmp = 1;
00380                 break;
00381             }
00382             num[jj] = num[jj - 1];
00383         }
00384         if (tmp == 0) {
00385             num[1] = nmax;
00386             jj = 1;
00387         }
00388         if (jj == 1) {
00389             xnj_1 = 0;
00390             xj_1 = 0;
00391         }
00392         else {
00393             xnj_1 = xn[num[jj - 1]];
00394             xj_1 = x[num[jj - 1]];
00395         }
00396         no1 = (xn[num[jj]] - xnj_1) * n;
00397         no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
00398         f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
00399         f *= n;
00400         xt1 = (x[num[jj]] - xj_1) * f;
00401         xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
00402         if (xt2 == 0) {
00403             xt2 = rangemin / 2.0 / rangemax * f;
00404             xt1 -= xt2;
00405         }
00406         else if (xt1 * xt2 == 0) {
00407             xt1 = rangemin / 2.0 / rangemax * f;
00408             xt2 -= xt1;
00409         }
00410 
00411         /* calculate chi-square to indicate statistical significance of new class, i.e. how probable would it be that the new class could be the result of purely random choice */
00412         if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
00413             chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
00414 
00415     }
00416 
00417     /*  Fill up classbreaks of i <=nbclass classes */
00418     for (j = 0; j <= (i - 1); j++)
00419         classbreaks[j] = zz[j + 1];
00420 
00421     return (chi2);
00422 }
00423 
00424 int class_frequencies(double *data, int count, int nbreaks,
00425                       double *classbreaks, int *frequencies)
00426 {
00427     int i, j;
00428     double min, max;
00429 
00430     min = data[0];
00431     max = data[count - 1];
00432     /* count cases in all classes, except for last class */
00433     i = 0;
00434     for (j = 0; j < nbreaks; j++) {
00435         while (data[i] <= classbreaks[j]) {
00436             frequencies[j]++;
00437             i++;
00438         }
00439     }
00440 
00441     /*Now count cases in last class */
00442     for (i = i; i < count; i++) {
00443         frequencies[nbreaks]++;
00444     }
00445 
00446     return (1);
00447 }
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines