/* $Id: fit.c,v 1.2 2004/06/16 18:34:08 pturner Exp $ * * curve fitting, and other numerical routines used in compose. * * Contents: * * void gauss() - simple gauss elimination for least squares poly fit * void stasum() - compute mean and variance * void leasqu() - entry to linear or polynoimial regression routines * double leasev() - evaluate least squares polynomial * void fitcurve() - compute coefficients for a polynomial fit of degree >1 * void runavg() - compute a running average * void runstddev() - compute a running standard deviation * void runmedian() - compute a running median * void runminmax() - compute a running minimum or maximum * void filterser() - apply a digital filter * void linearconv() - convolve one set with another * int crosscorr() - cross/auto correlation * int linear_regression() - linear regression * void spline() - compute a spline fit * double seval() - evaluate the spline computed in spline() */ #include #include #include "defines.h" #include "noxprotos.h" static char buf[256]; /* simple gauss elimination - no pivoting or scaling strategies all matrices are sym-pos-def */ void gauss(int n, double *a, int adim, double *b, double *x) { int i, k, j; double mult; for (k = 0; k < n - 1; k++) { for (i = k + 1; i < n; i++) { mult = a[adim * i + k] / a[adim * k + k]; for (j = k + 1; j < n; j++) { a[adim * i + j] = a[adim * i + j] - mult * a[adim * k + j]; } b[i] = b[i] - mult * b[k]; } } for (i = n - 1; i >= 0; i--) { x[i] = b[i]; for (j = i + 1; j < n; j++) x[i] = x[i] - a[adim * i + j] * x[j]; x[i] = x[i] / a[adim * i + i]; } } /* compute mean and standard dev */ void stasum(double *x, int n, double *xbar, double *sd, int flag) { int i; *xbar = 0; *sd = 0; if (n < 1) return; for (i = 0; i < n; i++) *xbar = (*xbar) + x[i]; *xbar = (*xbar) / n; for (i = 0; i < n; i++) *sd = (*sd) + (x[i] - *xbar) * (x[i] - *xbar); if (n - flag) *sd = sqrt(*sd / (n - flag)); else { errwin("compmean: (n-flag)==0"); *sd = 0; } } /* least squares polynomial fit - for polynomials of degree 5 or less */ void leasqu(int n, double *x, double *y, int degree, double *w, int wdim, double *r) { double b[11]; double sumy1, sumy2, ybar, ysdev, stemp, rsqu; double xbar, xsdev; int i, j, k; sumy1 = 0.0; sumy2 = 0.0; /* form the matrix with normal equations and RHS */ for (k = 0; k <= degree; k++) { for (j = k; j <= degree; j++) { w[wdim * k + j] = 0.0; for (i = 0; i < n; i++) { if (x[i] != 0.0) w[wdim * k + j] = pow(x[i], (double) (k)) * pow(x[i], (double) (j)) + w[wdim * k + j]; } if (k != j) w[wdim * j + k] = w[wdim * k + j]; } } for (k = 0; k <= degree; k++) { b[k] = 0.0; for (i = 0; i < n; i++) { if (x[i] != 0.0) b[k] = b[k] + pow(x[i], (double) (k)) * y[i]; } } gauss(degree + 1, w, wdim, b, r); /* solve */ stasum(y, n, &ybar, &ysdev, 1); /* compute statistics on fit */ stasum(x, n, &xbar, &xsdev, 1); for (i = 0; i < n; i++) { stemp = 0.0; for (j = 1; j <= degree; j++) { if (x[i] != 0.0) stemp = stemp + r[j] * pow(x[i], (double) (j)); } sumy1 = sumy1 + (stemp + r[0] - y[i]) * (stemp + r[0] - y[i]); sumy2 = sumy2 + y[i] * y[i]; } rsqu = 1.0 - sumy1 / (sumy2 - n * ybar * ybar); if (rsqu < 0.0) { rsqu = 0.0; } sprintf(buf, "Number of observations = %10d\n", n); stufftext(buf, 0); sprintf(buf, "A[0] is the constant, A[i] is the coefficient for ith power of X\n", n); stufftext(buf, 0); for (i = 0; i <= degree; i++) { sprintf(buf, "A[%d] = %.9lg\n", i, r[i]); stufftext(buf, 0); } i += 4; sprintf(buf, "R square = %.7lg\n", rsqu); stufftext(buf, 0); sprintf(buf, "Avg Y = %.7lg\n", ybar); stufftext(buf, 0); sprintf(buf, "Sdev Y = %.7lg\n", ysdev); stufftext(buf, 0); sprintf(buf, "Avg X = %.7lg\n", xbar); stufftext(buf, 0); sprintf(buf, "Sdev X = %.7lg\n\n", xsdev); stufftext(buf, 0); stufftext("\n", 2); } /* evaluate least squares polynomial */ double leasev(double *c, int degree, double x) { double temp; int i; temp = 0.0; for (i = 0; i <= degree; i++) { if ((i == 0) && (x == 0.0)) temp = temp + c[i]; /* avoid 0.0^0 */ else temp = temp + c[i] * pow(x, (double) (i)); } return (temp); } /* * curve fitting */ void fitcurve(double *x, double *y, int n, int ideg, double *fitted) { int i, ifail; double result[20], w[MAXFIT * MAXFIT], leasev(double *c, int degree, double x); ifail = 1; if (ideg > 1) { leasqu(n, x, y, ideg, w, MAXFIT, result); for (i = 0; i < n; i++) { fitted[i] = leasev(result, ideg, x[i]); } ifail = 0; } else { ifail = linear_regression(n, x, y, fitted); if (ifail == 1) { errwin("Linear_regression entered with N <= 3"); } else if (ifail == 2) { errwin("Linear_regression - all values of x or y are the same"); } } } /* compute a running average */ void runavg(double *x, double *y, double *ax, double *ay, int n, int ilen) { int i; double sumy = 0.0; double sumx = 0.0; for (i = 0; i < ilen; i++) { sumx = sumx + x[i]; sumy = sumy + y[i]; } ax[0] = sumx / ilen; ay[0] = sumy / ilen; for (i = 1; i < (n - ilen + 1); i++) { sumx = x[i + ilen - 1] - x[i - 1] + sumx; ax[i] = sumx / ilen; sumy = y[i + ilen - 1] - y[i - 1] + sumy; ay[i] = sumy / ilen; } } /* compute a running standard deviation */ void runstddev(double *x, double *y, double *ax, double *ay, int n, int ilen) { int i; double ybar, ysd; double sumx = 0.0; for (i = 0; i < ilen; i++) { sumx = sumx + x[i]; } ax[0] = sumx / ilen; stasum(y, ilen, &ybar, &ysd, 0); ay[0] = ysd; for (i = 1; i < (n - ilen + 1); i++) { stasum(y + i, ilen, &ybar, &ysd, 0); sumx = x[i + ilen - 1] - x[i - 1] + sumx; ax[i] = sumx / ilen; ay[i] = ysd; } } /* compute a running median */ void runmedian(double *x, double *y, double *ax, double *ay, int n, int ilen) { int i, j, nlen = n - ilen + 1; double *tmpx, *tmpy; tmpx = (double *) calloc(ilen, sizeof(double)); if (tmpx == NULL) { errwin("Can't calloc tmpx in runmedian"); return; } tmpy = (double *) calloc(ilen, sizeof(double)); if (tmpy == NULL) { errwin("Can't calloc tmpy in runmedian"); cxfree(tmpx); return; } for (i = 0; i < nlen; i++) { for (j = 0; j < ilen; j++) { tmpx[j] = x[j + i]; tmpy[j] = y[j + i]; } sort_xy(tmpx, tmpy, ilen, 1, 0); if (ilen % 2) { ax[i] = x[i + (ilen / 2)]; ay[i] = tmpy[ilen / 2]; } else { ax[i] = (x[i + ilen / 2] + x[i + (ilen - 1) / 2]) * 0.5; ay[i] = (tmpy[ilen / 2] + tmpy[(ilen - 1) / 2]) * 0.5; } } cxfree(tmpx); cxfree(tmpy); } /* compute a running minimum or maximum */ void runminmax(double *x, double *y, double *ax, double *ay, int n, int ilen, int type) { int i, j; double min, max; double sumx = 0.0; min = max = y[0]; for (i = 0; i < ilen; i++) { sumx = sumx + x[i]; if (min > y[i]) min = y[i]; if (max < y[i]) max = y[i]; } ax[0] = sumx / ilen; if (type == 0) { ay[0] = min; } else if (type == 1) { ay[0] = max; } else { errwin("Unknown type in runminmax, setting type = min"); type = 0; } for (i = 1; i < (n - ilen + 1); i++) { sumx = x[i + ilen - 1] - x[i - 1] + sumx; ax[i] = sumx / ilen; min = y[i]; max = y[i]; for (j = 0; j < ilen; j++) { if (min > y[i + j]) min = y[i + j]; if (max < y[i + j]) max = y[i + j]; } if (type == 0) { ay[i] = min; } else if (type == 1) { ay[i] = max; } } } /* Apply a digital filter of length len to a set in x, y, of length n with the results going to resx, resy. the length of the result is set by the caller */ void filterser(int n, double *x, double *y, double *resx, double *resy, double *h, int len) { int i, j, outlen, eo, ld2; double sum; outlen = n - len + 1; eo = len % 2; ld2 = len / 2; for (i = 0; i < outlen; i++) { sum = 0.0; for (j = 0; j < len; j++) { sum = sum + h[j] * y[j + i]; } resy[i] = sum; if (eo) resx[i] = x[i + ld2]; else resx[i] = (x[i + ld2] + x[i + ld2 - 1]) / 2.0; } } /* linear convolution of set x (length n) with h (length m) and result to y. the length of y is set by the caller */ void linearconv(double *x, double *h, double *y, int n, int m) { int i, j, itmp; for (i = 0; i < n + m - 1; i++) { for (j = 0; j < m; j++) { itmp = i - j; if ((itmp >= 0) && (itmp < n)) { y[i] = y[i] + h[j] * x[itmp]; } } } } /* * cross correlation */ int crosscorr(double *x, double *y, int n, int lag, int meth, double *xcov, double *xcor) { double xbar, xsd; double ybar, ysd; int i, j; if (lag >= n) return 1; stasum(x, n, &xbar, &xsd, 0); if (xsd == 0.0) return 2; stasum(y, n, &ybar, &ysd, 0); if (ysd == 0.0) return 3; for (i = 0; i < lag; i++) { xcor[i] = 0.0; xcov[i] = 0.0; for (j = 0; j < n - i; j++) { xcov[i] = xcov[i] + (y[j] - ybar) * (x[j + i] - xbar); } if (meth) xcov[i] = xcov[i] / (n - i); else xcov[i] = xcov[i] / n; xcor[i] = xcov[i] / (xsd * ysd); } return 0; } /* * References, * * _Applied Linear Regression_, Weisberg * _Elements of Statistical Computing_, Thisted * * Fits y = coef*x + intercept + e * * uses a 2 pass method for means and variances * */ int linear_regression(int n, double *x, double *y, double *fitted) { double xbar, ybar; /* sample means */ double sdx, sdy; /* sample standard deviations */ double sxy, rxy; /* sample covariance and sample correlation */ double SXX, SYY, SXY; /* sums of squares */ double RSS; /* residual sum of squares */ double rms; /* residual mean square */ double sereg; /* standard error of regression */ double seslope, seintercept; double slope, intercept; /* */ double SSreg, F, R2; int i; if (n <= 3) { return 1; } xbar = ybar = 0.0; SXX = SYY = SXY = 0.0; for (i = 0; i < n; i++) { xbar = xbar + x[i]; ybar = ybar + y[i]; } xbar = xbar / n; ybar = ybar / n; for (i = 0; i < n; i++) { SXX = SXX + (x[i] - xbar) * (x[i] - xbar); SYY = SYY + (y[i] - ybar) * (y[i] - ybar); SXY = SXY + (x[i] - xbar) * (y[i] - ybar); } sdx = sqrt(SXX / (n - 1)); sdy = sqrt(SYY / (n - 1)); if (sdx == 0.0) { return 2; } if (sdy == 0.0) { return 2; } sxy = SXY / (n - 1); rxy = sxy / (sdx * sdy); slope = SXY / SXX; intercept = ybar - slope * xbar; RSS = SYY - slope * SXY; rms = RSS / (n - 2); sereg = sqrt(RSS / (n - 2)); seintercept = sqrt(rms * (1.0 / n + xbar * xbar / SXX)); seslope = sqrt(rms / SXX); SSreg = SYY - RSS; F = SSreg / rms; R2 = SSreg / SYY; sprintf(buf, "Number of observations\t\t\t = %d\n", n); stufftext(buf, 0); sprintf(buf, "Mean of independent variable\t\t = %.7g\n", xbar); stufftext(buf, 0); sprintf(buf, "Mean of dependent variable\t\t = %.7g\n", ybar); stufftext(buf, 0); sprintf(buf, "Standard dev. of ind. variable\t\t = %.7g\n", sdx); stufftext(buf, 0); sprintf(buf, "Standard dev. of dep. variable\t\t = %.7g\n", sdy); stufftext(buf, 0); sprintf(buf, "Correlation coefficient\t\t\t = %.7g\n", rxy); stufftext(buf, 0); sprintf(buf, "Regression coefficient (SLOPE)\t\t = %.7g\n", slope); stufftext(buf, 0); sprintf(buf, "Standard error of coefficient\t\t = %.7g\n", seslope); stufftext(buf, 0); sprintf(buf, "t - value for coefficient\t\t = %.7g\n", slope / seslope); stufftext(buf, 0); sprintf(buf, "Regression constant (INTERCEPT)\t\t = %.7g\n", intercept); stufftext(buf, 0); sprintf(buf, "Standard error of constant\t\t = %.7g\n", seintercept); stufftext(buf, 0); sprintf(buf, "t - value for constant\t\t\t = %.7g\n", intercept / seintercept); stufftext(buf, 0); sprintf(buf, "\nAnalysis of variance\n"); stufftext(buf, 0); sprintf(buf, "Source\t\t d.f\t Sum of squares\t Mean Square\t F\n"); stufftext(buf, 0); sprintf(buf, "Regression\t 1\t%.7g\t%.7g\t%.7g\n", SSreg, SSreg, F); stufftext(buf, 0); sprintf(buf, "Residual\t%5d\t%.7g\t%.7g\n", n - 2, RSS, RSS / (n - 2)); stufftext(buf, 0); sprintf(buf, "Total\t\t%5d\t%.7g\n\n", n - 1, SYY); stufftext(buf, 2); for (i = 0; i < n; i++) { fitted[i] = slope * x[i] + intercept; } return 0; } /* a literal translation of the spline routine in Forsyth, Malcolm, and Moler */ void spline(int n, double *x, double *y, double *b, double *c, double *d) { /* c c the coefficients b(i), c(i), and d(i), i=1,2,...,n are computed c for a cubic interpolating spline c c s(x) = y(i) + b(i)*(x-x(i)) + c(i)*(x-x(i))**2 + d(i)*(x-x(i))**3 c c for x(i) .le. x .le. x(i+1) c c input.. c c n = the number of data points or knots (n.ge.2) c x = the abscissas of the knots in strictly increasing order c y = the ordinates of the knots c c output.. c c b, c, d = arrays of spline coefficients as defined above. c c using p to denote differentiation, c c y(i) = s(x(i)) c b(i) = sp(x(i)) c c(i) = spp(x(i))/2 c d(i) = sppp(x(i))/6 (derivative from the right) c c the accompanying function subprogram seval can be used c to evaluate the spline. c c */ int nm1, ib, i; double t; /* Gack! */ x--; y--; b--; c--; d--; /* Fortran 66 */ nm1 = n - 1; if (n < 2) return; if (n < 3) goto l50; /* c c set up tridiagonal system c c b = diagonal, d = offdiagonal, c = right hand side. c */ d[1] = x[2] - x[1]; c[2] = (y[2] - y[1]) / d[1]; for (i = 2; i <= nm1; i++) { d[i] = x[i + 1] - x[i]; b[i] = 2.0 * (d[i - 1] + d[i]); c[i + 1] = (y[i + 1] - y[i]) / d[i]; c[i] = c[i + 1] - c[i]; } /* c c end conditions. third derivatives at x(1) and x(n) c obtained from divided differences c */ b[1] = -d[1]; b[n] = -d[n - 1]; c[1] = 0.0; c[n] = 0.0; if (n == 3) goto l15; c[1] = c[3] / (x[4] - x[2]) - c[2] / (x[3] - x[1]); c[n] = c[n - 1] / (x[n] - x[n - 2]) - c[n - 2] / (x[n - 1] - x[n - 3]); c[1] = c[1] * d[1] * d[1] / (x[4] - x[1]); c[n] = -c[n] * d[n - 1] * d[n - 1] / (x[n] - x[n - 3]); /* c c forward elimination c */ l15:; for (i = 2; i <= n; i++) { t = d[i - 1] / b[i - 1]; b[i] = b[i] - t * d[i - 1]; c[i] = c[i] - t * c[i - 1]; } /* c c back substitution c */ c[n] = c[n] / b[n]; for (ib = 1; ib <= nm1; ib++) { i = n - ib; c[i] = (c[i] - d[i] * c[i + 1]) / b[i]; } /* c c c(i) is now the sigma(i) of the text c c compute polynomial coefficients c */ b[n] = (y[n] - y[nm1]) / d[nm1] + d[nm1] * (c[nm1] + 2.0 * c[n]); for (i = 1; i <= nm1; i++) { b[i] = (y[i + 1] - y[i]) / d[i] - d[i] * (c[i + 1] + 2.0 * c[i]); d[i] = (c[i + 1] - c[i]) / d[i]; c[i] = 3.0 * c[i]; } c[n] = 3.0 * c[n]; d[n] = d[n - 1]; return; l50:; b[1] = (y[2] - y[1]) / (x[2] - x[1]); c[1] = 0.0; d[1] = 0.0; b[2] = b[1]; c[2] = 0.0; d[2] = 0.0; return; } double seval(int n, double u, double *x, double *y, double *b, double *c, double *d) { /* c c this subroutine evaluates the cubic spline function c c seval = y(i) + b(i)*(u-x(i)) + c(i)*(u-x(i))**2 + d(i)*(u-x(i))**3 c c where x(i) .lt. u .lt. x(i+1), using horner's rule c c if u .lt. x(1) then i = 1 is used. c if u .ge. x(n) then i = n is used. c c input.. c c n = the number of data points c u = the abscissa at which the spline is to be evaluated c x,y = the arrays of data abscissas and ordinates c b,c,d = arrays of spline coefficients computed by spline c c if u is not in the same interval as the previous call, then a c binary search is performed to determine the proper interval. c */ int j, k; double dx; int i; /* c c binary search c */ if (u < x[0]) { i = 0; } else if (u >= x[n - 1]) { i = n - 1; } else { i = 0; j = n; l20: ; k = (i + j) / 2; if (u < x[k]) j = k; if (u >= x[k]) i = k; if (j > i + 1) goto l20; } /* c c evaluate spline c */ l30:; dx = u - x[i]; return (y[i] + dx * (b[i] + dx * (c[i] + dx * d[i]))); } /* * compute ntiles */ void ntiles(double *x, double *y, int n, int nt, double *resx, double *resy) { int i, j = 0; double *tmpx, *tmpy, div; tmpx = (double *) calloc(n, sizeof(double)); if (tmpx == NULL) { errwin("Can't calloc tmpx in percentiles"); return; } tmpy = (double *) calloc(n, sizeof(double)); if (tmpy == NULL) { errwin("Can't calloc tmpy in percentiles"); cxfree(tmpx); return; } for (i = 0; i < n; i++) { tmpx[i] = x[i]; tmpy[i] = y[i]; } sort_xy(tmpx, tmpy, n, 1, 0); for (i = 0; i < nt; i++) { div = 1.0 / nt; resx[j] = (i + 1) * n * div; resy[j] = tmpy[(i * n) / nt]; j++; } cxfree(tmpx); cxfree(tmpy); } /* * interpolate (xr, yr) using x from (x, y) * */ void interp_pts(double *x, double *y, int n1, double *xr, double *yr, int n2) { }