GRASS Programmer's Manual
6.4.2(2012)
|
00001 /* functions to classify sorted arrays of doubles and fill a vector of classbreaks */ 00002 00003 #include <grass/glocale.h> 00004 #include <grass/arraystats.h> 00005 00006 double class_apply_algorithm(char *algo, double *data, int nrec, int *nbreaks, 00007 double *classbreaks) 00008 { 00009 double finfo = 0.0; 00010 00011 if (G_strcasecmp(algo, "int") == 0) 00012 finfo = class_interval(data, nrec, *nbreaks, classbreaks); 00013 else if (G_strcasecmp(algo, "std") == 0) 00014 finfo = class_stdev(data, nrec, *nbreaks, classbreaks); 00015 else if (G_strcasecmp(algo, "qua") == 0) 00016 finfo = class_quant(data, nrec, *nbreaks, classbreaks); 00017 else if (G_strcasecmp(algo, "equ") == 0) 00018 finfo = class_equiprob(data, nrec, nbreaks, classbreaks); 00019 else if (G_strcasecmp(algo, "dis") == 0) 00020 /* finfo = class_discont(data, nrec, *nbreaks, classbreaks); disabled because of bugs */ 00021 G_fatal_error(_("Discont algorithm currently not available because of bugs")); 00022 else 00023 G_fatal_error(_("%s: Unknown algorithm"), algo); 00024 00025 if (finfo == 0) 00026 G_fatal_error(_("%s: Error in classification algorithm"), algo); 00027 00028 return finfo; 00029 } 00030 00031 int class_interval(double *data, int count, int nbreaks, double *classbreaks) 00032 { 00033 double min, max; 00034 double step; 00035 int i = 0; 00036 00037 min = data[0]; 00038 max = data[count - 1]; 00039 00040 step = (max - min) / (nbreaks + 1); 00041 00042 for (i = 0; i < nbreaks; i++) 00043 classbreaks[i] = min + (step * (i + 1)); 00044 00045 return (1); 00046 } 00047 00048 double class_stdev(double *data, int count, int nbreaks, double *classbreaks) 00049 { 00050 struct GASTATS stats; 00051 int i; 00052 int nbclass; 00053 double scale = 1.0; 00054 00055 basic_stats(data, count, &stats); 00056 00057 nbclass = nbreaks + 1; 00058 00059 if (nbclass % 2 == 1) { /* number of classes is uneven so we center middle class on mean */ 00060 00061 /* find appropriate fraction of stdev for step */ 00062 i = 1; 00063 while (i) { 00064 if (((stats.mean + stats.stdev * scale / 2) + 00065 (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) || 00066 ((stats.mean - stats.stdev * scale / 2) - 00067 (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min)) 00068 scale = scale / 2; 00069 else 00070 i = 0; 00071 } 00072 00073 /* classbreaks below the mean */ 00074 for (i = 0; i < nbreaks / 2; i++) 00075 classbreaks[i] = 00076 (stats.mean - stats.stdev * scale / 2) - 00077 stats.stdev * scale * (nbreaks / 2 - (i + 1)); 00078 /* classbreaks above the mean */ 00079 for (i = i; i < nbreaks; i++) 00080 classbreaks[i] = 00081 (stats.mean + stats.stdev * scale / 2) + 00082 stats.stdev * scale * (i - nbreaks / 2); 00083 00084 } 00085 else { /* number of classes is even so mean is a classbreak */ 00086 00087 /* decide whether to use 1*stdev or 0.5*stdev as step */ 00088 i = 1; 00089 while (i) { 00090 if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) > 00091 stats.max) || 00092 ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) < 00093 stats.min)) 00094 scale = scale / 2; 00095 else 00096 i = 0; 00097 } 00098 00099 /* classbreaks below the mean and on the mean */ 00100 for (i = 0; i <= nbreaks / 2; i++) 00101 classbreaks[i] = 00102 stats.mean - stats.stdev * scale * (nbreaks / 2 - i); 00103 /* classbreaks above the mean */ 00104 for (i = i; i < nbreaks; i++) 00105 classbreaks[i] = 00106 stats.mean + stats.stdev * scale * (i - nbreaks / 2); 00107 } 00108 00109 return (scale); 00110 } 00111 00112 int class_quant(double *data, int count, int nbreaks, double *classbreaks) 00113 { 00114 int i, step; 00115 00116 step = count / (nbreaks + 1); 00117 00118 for (i = 0; i < nbreaks; i++) 00119 classbreaks[i] = data[step * (i + 1)]; 00120 00121 return (1); 00122 } 00123 00124 00125 int class_equiprob(double *data, int count, int *nbreaks, double *classbreaks) 00126 { 00127 int i, j; 00128 double *lequi; /*Vector of scale factors for probabilities of the normal distribution */ 00129 struct GASTATS stats; 00130 int nbclass; 00131 00132 nbclass = *nbreaks + 1; 00133 00134 lequi = G_malloc(*nbreaks * sizeof(double)); 00135 00136 /*The following values come from the normal distribution and 00137 * will be used as: 00138 * classbreak[i] = (lequi[i] * stdev) + mean; 00139 */ 00140 00141 if (nbclass < 3) { 00142 lequi[0] = 0; 00143 } 00144 else if (nbclass == 3) { 00145 lequi[0] = -0.43076; 00146 lequi[1] = 0.43076; 00147 } 00148 else if (nbclass == 4) { 00149 lequi[0] = -0.6745; 00150 lequi[1] = 0; 00151 lequi[2] = 0.6745; 00152 } 00153 else if (nbclass == 5) { 00154 lequi[0] = -0.8416; 00155 lequi[1] = -0.2533; 00156 lequi[2] = 0.2533; 00157 lequi[3] = 0.8416; 00158 } 00159 else if (nbclass == 6) { 00160 lequi[0] = -0.9676; 00161 lequi[1] = -0.43076; 00162 lequi[2] = 0; 00163 lequi[3] = 0.43076; 00164 lequi[4] = 0.9676; 00165 } 00166 else if (nbclass == 7) { 00167 lequi[0] = -1.068; 00168 lequi[1] = -0.566; 00169 lequi[2] = -0.18; 00170 lequi[3] = 0.18; 00171 lequi[4] = 0.566; 00172 lequi[5] = 1.068; 00173 } 00174 else if (nbclass == 8) { 00175 lequi[0] = -1.1507; 00176 lequi[1] = -0.6745; 00177 lequi[2] = -0.3187; 00178 lequi[3] = 0; 00179 lequi[4] = 0.3187; 00180 lequi[5] = 0.6745; 00181 lequi[6] = 1.1507; 00182 } 00183 else if (nbclass == 9) { 00184 lequi[0] = -1.2208; 00185 lequi[1] = -0.7648; 00186 lequi[2] = -0.4385; 00187 lequi[3] = -0.1397; 00188 lequi[4] = 0.1397; 00189 lequi[5] = 0.4385; 00190 lequi[6] = 0.7648; 00191 lequi[7] = 1.2208; 00192 } 00193 else if (nbclass == 10) { 00194 lequi[0] = -1.28155; 00195 lequi[1] = -0.84162; 00196 lequi[2] = -0.5244; 00197 lequi[3] = -0.25335; 00198 lequi[4] = 0; 00199 lequi[5] = 0.25335; 00200 lequi[6] = 0.5244; 00201 lequi[7] = 0.84162; 00202 lequi[8] = 1.28155; 00203 } 00204 else { 00205 G_fatal_error 00206 ("Equiprobable classbreaks currently limited to 10 classes"); 00207 } 00208 00209 basic_stats(data, count, &stats); 00210 00211 /*check if any of the classbreaks would fall outside of the range min-max */ 00212 j = 0; 00213 for (i = 0; i < *nbreaks; i++) { 00214 if ((lequi[i] * stats.stdev + stats.mean) >= stats.min && 00215 (lequi[i] * stats.stdev) + stats.mean <= stats.max) { 00216 j++; 00217 } 00218 } 00219 00220 if (j < (*nbreaks)) { 00221 G_warning(_("There are classbreaks outside the range min-max. Number of classes reduced to %i, but using probabilities for %i classes."), 00222 j + 1, *nbreaks + 1); 00223 G_realloc(classbreaks, j * sizeof(double)); 00224 for (i = 0; i < j; i++) 00225 classbreaks[i] = 0; 00226 } 00227 00228 j = 0; 00229 for (i = 0; i < *nbreaks; i++) { 00230 if ((lequi[i] * stats.stdev + stats.mean) >= stats.min && 00231 (lequi[i] * stats.stdev) + stats.mean <= stats.max) { 00232 classbreaks[j] = lequi[i] * stats.stdev + stats.mean; 00233 j++; 00234 } 00235 } 00236 00237 *nbreaks = j; 00238 00239 G_free(lequi); 00240 return (1); 00241 } 00242 00243 /* FixMe: there seems to a problem with array overflow, probably due to the fact that the code was ported from fortran which has 1-based arrays*/ 00244 double class_discont(double *data, int count, int nbreaks, 00245 double *classbreaks) 00246 { 00247 int *num, nbclass; 00248 double *no, *zz, *nz, *xn, *co; 00249 double *x; //Vecteur des observations standardisées 00250 int i, j, k; 00251 double min = 0, max = 0, rangemax = 0; 00252 int n = 0; 00253 double rangemin = 0, xlim = 0; 00254 double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0; 00255 int nf = 0, nmax = 0; 00256 double *abc; 00257 int nd = 0; 00258 double den = 0, d = 0; 00259 int im = 0, ji = 0; 00260 int tmp = 0; 00261 int nff = 0, jj = 0, no1 = 0, no2 = 0; 00262 double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0; 00263 00264 00265 /*get the number of values */ 00266 n = count; 00267 00268 nbclass = nbreaks + 1; 00269 00270 num = G_malloc((nbclass + 1) * sizeof(int)); 00271 no = G_malloc((nbclass + 1) * sizeof(double)); 00272 zz = G_malloc((nbclass + 1) * sizeof(double)); 00273 nz = G_malloc(3 * sizeof(double)); 00274 xn = G_malloc((n + 1) * sizeof(double)); 00275 co = G_malloc((nbclass + 1) * sizeof(double)); 00276 00277 /* We copy the array of values to x, in order to be able to standardize it */ 00278 x = G_malloc((n + 1) * sizeof(double)); 00279 x[0] = n; 00280 xn[0] = 0; 00281 00282 min = data[0]; 00283 max = data[count - 1]; 00284 for (i = 1; i <= n; i++) 00285 x[i] = data[i - 1]; 00286 00287 rangemax = max - min; 00288 rangemin = rangemax; 00289 00290 for (i = 2; i <= n; i++) { 00291 if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin) 00292 rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */ 00293 } 00294 00295 /* STANDARDIZATION 00296 * and creation of the number vector (xn) */ 00297 00298 for (i = 1; i <= n; i++) { 00299 x[i] = (x[i] - min) / rangemax; 00300 xn[i] = i / (double)n; 00301 } 00302 xlim = rangemin / rangemax; 00303 rangemin = rangemin / 2.0; 00304 00305 /* Searching for the limits */ 00306 num[1] = n; 00307 abc = G_malloc(3 * sizeof(double)); 00308 00309 /* Loop through possible solutions */ 00310 for (i = 1; i <= nbclass; i++) { 00311 nmax = 0; 00312 dmax = 0.0; 00313 d2 = 0.0; 00314 nf = 0; /*End number */ 00315 00316 /* Loop through classes */ 00317 for (j = 1; j <= i; j++) { 00318 nd = nf; /*Start number */ 00319 nf = num[j]; 00320 co[j] = 10e37; 00321 eqdrt(x, xn, nd, nf, abc); 00322 den = sqrt(pow(abc[1], 2) + 1.0); 00323 nd++; 00324 /* Loop through observations */ 00325 for (k = nd; k <= nf; k++) { 00326 if (abc[2] == 0.0) 00327 d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den; 00328 else 00329 d = fabs(x[k] - abc[2]); 00330 d2 += pow(d, 2); 00331 if (x[k] - x[nd] < xlim) 00332 continue; 00333 if (x[nf] - x[k] < xlim) 00334 continue; 00335 if (d <= dmax) 00336 continue; 00337 dmax = d; 00338 nmax = k; 00339 } 00340 nd--; //A VERIFIER! 00341 if (x[nf] != x[nd]) { 00342 if (nd != 0) 00343 co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]); 00344 else 00345 co[j] = (xn[nf]) / (x[nf]); //A VERIFIER! 00346 } 00347 } 00348 if (i == 1) 00349 dd = d2; 00350 p = d2 / dd; 00351 for (j = 1; j <= i; j++) { 00352 no[j] = num[j]; 00353 zz[j] = x[num[j]] * rangemax + min; 00354 if (j == i) 00355 continue; 00356 if (co[j] > co[j + 1]) { 00357 zz[j] = zz[j] + rangemin; 00358 continue; 00359 } 00360 zz[j] = zz[j] - rangemin; 00361 no[j] = no[j] - 1; 00362 } 00363 im = i - 1; 00364 if (im != 0.0) { 00365 for (j = 1; j <= im; j++) { 00366 ji = i + 1 - j; 00367 no[ji] -= no[ji - 1]; 00368 } 00369 } 00370 if (nmax == 0) { 00371 break; 00372 } 00373 nff = i + 2; 00374 tmp = 0; 00375 for (j = 1; j <= i; j++) { 00376 jj = nff - j; 00377 if (num[jj - 1] < nmax) { 00378 num[jj] = nmax; 00379 tmp = 1; 00380 break; 00381 } 00382 num[jj] = num[jj - 1]; 00383 } 00384 if (tmp == 0) { 00385 num[1] = nmax; 00386 jj = 1; 00387 } 00388 if (jj == 1) { 00389 xnj_1 = 0; 00390 xj_1 = 0; 00391 } 00392 else { 00393 xnj_1 = xn[num[jj - 1]]; 00394 xj_1 = x[num[jj - 1]]; 00395 } 00396 no1 = (xn[num[jj]] - xnj_1) * n; 00397 no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n; 00398 f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1); 00399 f *= n; 00400 xt1 = (x[num[jj]] - xj_1) * f; 00401 xt2 = (x[num[jj + 1]] - x[num[jj]]) * f; 00402 if (xt2 == 0) { 00403 xt2 = rangemin / 2.0 / rangemax * f; 00404 xt1 -= xt2; 00405 } 00406 else if (xt1 * xt2 == 0) { 00407 xt1 = rangemin / 2.0 / rangemax * f; 00408 xt2 -= xt1; 00409 } 00410 00411 /* calculate chi-square to indicate statistical significance of new class, i.e. how probable would it be that the new class could be the result of purely random choice */ 00412 if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2)) 00413 chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2); 00414 00415 } 00416 00417 /* Fill up classbreaks of i <=nbclass classes */ 00418 for (j = 0; j <= (i - 1); j++) 00419 classbreaks[j] = zz[j + 1]; 00420 00421 return (chi2); 00422 } 00423 00424 int class_frequencies(double *data, int count, int nbreaks, 00425 double *classbreaks, int *frequencies) 00426 { 00427 int i, j; 00428 double min, max; 00429 00430 min = data[0]; 00431 max = data[count - 1]; 00432 /* count cases in all classes, except for last class */ 00433 i = 0; 00434 for (j = 0; j < nbreaks; j++) { 00435 while (data[i] <= classbreaks[j]) { 00436 frequencies[j]++; 00437 i++; 00438 } 00439 } 00440 00441 /*Now count cases in last class */ 00442 for (i = i; i < count; i++) { 00443 frequencies[nbreaks]++; 00444 } 00445 00446 return (1); 00447 }