/***************************************************************************** * probability.c * * DESCRIPTION * This file implements some probabilty functions. * * HISTORY * 4/2007 Arthur A Taylor (MDL): Added to libaat * 6/2007 Tom R Shaw (OITC): Fixed MacOSX compile error * * NOTES ****************************************************************************/ #include #include #include "libaat.h" /* In __STRICT_ANSI__, neither erf(), nor erfc() are provided in math.h, so * one can either implement them, or put a #undef __STRICT_ANSI__ around the * #include and assume that the compiler has them. I chose to * provide them. */ #undef _PROVIDE_ERF #ifdef __STRICT_ANSI__ #define _PROVIDE_ERF #else #ifdef NEED_ERF #define _PROVIDE_ERF #endif #endif /* Added to support MacOSX standard config with erf(), nor erfc() being * provided in math.h. TRS */ #ifdef __APPLE_CC__ #undef _PROVIDE_ERF #endif #ifdef _PROVIDE_ERF /* Prototype for erfc (since erf() and erfc() reference each other */ static double erfc(double x); /***************************************************************************** * err() -- Arthur Taylor / MDL * * PURPOSE * To compute the Gaussian error function (see: * http://mathworld.wolfram.com/Erf.html). * erf(x) = 2/sqrt(pi)*integral(exp(-t^2),t,0,x) * = 1 - erfc(x) * Using the Maclaurin series we get: * = 2/sqrt(pi)*[x - x^3/3 + x^5/5*2! - x^7/7*3! + ...] * * ARGUMENTS * x = The desired value to compute erf(x) of (Input) * * RETURNS: double * * HISTORY * 1/2004 Steve Strand (see: * http://www.digitalmars.com/archives/cplusplus/3634.html * 4/2007 AAT (MDL) Commented. * * NOTES ****************************************************************************/ /* Calculate 12 significant figures. Can adjust some, but don't choose > 15 * figures (assuming 52 bit mantissa in a double). */ #define REL_ERROR 1E-12 #ifndef M_2_SQRTPI #define M_2_SQRTPI 1.12837916709551257390 #endif static double erf(double x) { double sum = x; /* The current sum in the Maclaurin series */ double term = x; /* The next term in the series. */ double xsqr; /* x^2, used to avoid repeated multiplications. */ int j = 1; /* The term we're working on */ /* Use continued fraction when fabs(x) > 2.2 */ if (fabs(x) > 2.2) { return 1.0 - erfc(x); } xsqr = x * x; do { term *= xsqr / (double)j; sum -= term / (double)(2 * j + 1); ++j; term *= xsqr / (double)j; sum += term / (double)(2 * j + 1); ++j; } while (fabs(term / sum) > REL_ERROR); return M_2_SQRTPI * sum; } /***************************************************************************** * erfc() -- Arthur Taylor / MDL * * PURPOSE * To compute the "complementary" error function (see: * http://mathworld.wolfram.com/Erfc.html). * erfc(x) = 2/sqrt(pi)*integral(exp(-t^2),t,x,inf) * = 1 - erf(x) * (http://functions.wolfram.com/GammaBetaErf/Erfc/10/01/0001/) * = exp(-x^2)/sqrt(pi) * [1/x+ (1/2)/x+ (2/2)/x+ (3/2)/x+ (4/2)/x+ ...] * (expression inside [] is a continued fraction so '+' means add to * denominator) * * Solving the [] with more and more terms reveals: * i=1) 1/x, * i=2) x/(x^2 + 1/2)), * i=3) (1 + x^2)/(x + x^3 + 1/2x), * Algorithm claims fraction changes by (numerator:n(i) / denominator:d(i)): * n(i) = n(i-2) * C + n(i-1) * x * d(i) = d(i-2) * C + d(i-1) * x * Where C = .5^(i-3) * * ARGUMENTS * x = The desired value to compute erfc(x) of (Input) * * RETURNS: double * * HISTORY * 1/2004 Steve Strand (see: * http://www.digitalmars.com/archives/cplusplus/3634.html * 4/2007 AAT (MDL) Commented. * * NOTES ****************************************************************************/ #define M_1_SQRTPI 0.56418958354775628695 static double erfc(double x) { double a = 1; /* Earlier numerator. */ double b = x; /* Later numerator. */ double c = x; /* Earlier denominator. */ double d = x * x + 0.5; /* Later denominator. */ double q1; /* The earlier aprox. (starts with 1/x (ignored)) */ double q2 = b / d; /* The later aprox. (starts with x/(x^2 + 1/2)) */ double n = 1.0; double t; /* Use series when fabs(x) < 2.2 */ if (fabs(x) < 2.2) { return 1.0 - erf(x); } /* continued fraction only valid for x > 0 */ if (x < 0) { return 2.0 - erfc(-x); } do { t = a * n + b * x; a = b; b = t; t = c * n + d * x; c = d; d = t; n += 0.5; q1 = q2; q2 = b / d; } while (fabs(q1 - q2) / q2 > REL_ERROR); return M_1_SQRTPI * exp(-x * x) * q2; } /* end of #ifdef _PROVIDE_ERF */ #endif /***************************************************************************** * normInv() -- Arthur Taylor / MDL * * PURPOSE * To compute the inverse to the normal cumulative distribution. See: * http://home.online.no/~pjacklam/notes/invnorm/ * http://mathworld.wolfram.com/InverseErf.html * * Note: http://en.wikipedia.org/wiki/Error_function * States that the error function (erf) is essentially identical to the * standard "normal cumulative distribution" function, Phi(x), as they differ * only by scaling and translation. Indeed: * Phi(x) = 1/2 (1 + erf(x/sqrt(2)) * * ARGUMENTS * x = The desired value to compute normInv(x) of (Input) * * RETURNS: double * * HISTORY * ?? Peter J. Acklam. * 4/2007 AAT (MDL) Commented. * * NOTES ****************************************************************************/ #ifndef M_PI #define M_PI 3.14159265358979323846 #endif static double normInv(double p) { static const double A[] = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 }; static const double B[] = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 }; static const double C[] = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 }; static const double D[] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 }; static const double p_low = 0.02425; static const double p_high = 0.97575; /* 1 - p_low */ double x; /* Temporary copy of the answer. */ double q; /* temp var 1 during approximation */ double r; /* temp var 2 during approximation */ double e; /* temp var 1 during refinement */ double u; /* temp var 2 during refinement */ /* Rational approximation for lower region. */ if ((p > 0) && (p < p_low)) { q = sqrt(-2 * log(p)); x = ((((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5]) / ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1)); /* Rational approximation for central region. */ } else if ((p >= p_low) && (p <= p_high)) { q = p - 0.5; r = q * q; x = ((((((A[0] * r + A[1]) * r + A[2]) * r + A[3]) * r + A[4]) * r + A[5]) * q / (((((B[0] * r + B[1]) * r + B[2]) * r + B[3]) * r + B[4]) * r + 1)); /* Rational approximation for upper region. */ } else if ((p > p_high) && (p < 1)) { q = sqrt(-2 * log(1 - p)); x = -((((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5]) / ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1)); } else { myWarn_Err1Arg("p is out of the valid range\n"); return 0; } /* Refine the answer using Halley's rational method (3rd order) */ e = 0.5 * erfc(-x / sqrt(2)) - p; u = e * sqrt(2 * M_PI) * exp(x * x / 2); x = x - u / (1 + x * u / 2); return x; } /***************************************************************************** * probNormDist() -- Arthur Taylor / MDL * * PURPOSE * To compute the normal cumulative distribution. See: * http://mathworld.wolfram.com/NormalDistribution.html (equation 3) * Returns the probabilty that a standard normal distribution assumes a value * in the interval of [0,x] (assuming a mean of 0, and standard dev of 1) * Phi(x) = 1/2 (1 + erf(x/sqrt(2)) * * ARGUMENTS * x = The desired value to the normal distribution of (Input) * * RETURNS: double * * HISTORY * 4/2007 AAT (MDL) Commented. * * NOTES ****************************************************************************/ double probNormDist(double x) { return (erf(x / sqrt(2)) / 2.); } /***************************************************************************** * probNormBound() -- Arthur Taylor / MDL * * PURPOSE * To compute "right" so that between left and right, there is "prob" * probability based on the normal distribution. * * ARGUMENTS * prob = The desired probability amount [0.0, 0.5] (Input) * left = The left edge of the area in question (Input) * * RETURNS: double * * HISTORY * 4/2007 AAT (MDL) Commented. * * NOTES ****************************************************************************/ double probNormBound(double prob, double left) { double lftProb; /* The amount of prob to associate from 0 to left */ if ((left < 0) || (prob > .5) || (prob < 0)) { myWarn_Err1Arg("need: left >= 0, 0 <= prob <= .5\n"); return .5; } lftProb = probNormDist(left); if (prob + lftProb > .5) { myWarn_Err1Arg("'left' is too far right for requested prob.\n"); return .5; } prob += lftProb; return normInv(prob + .5); } /***************************************************************************** * main() -- Arthur Taylor / MDL * * PURPOSE * To test the probability module functions. * * ARGUMENTS * argc = The number of arguments on the command line. (Input) * argv = The arguments on the command line. (Input) * * RETURNS: int * * HISTORY * 4/2007 AAT (MDL): Commented * * NOTES ****************************************************************************/ #ifdef DEBUG_PROB #include #include int main(int argc, char **argv) { double prob; double left; double x; double ans; if ((argc != 3) && (argc != 4)) { printf("usage %s \n", argv[0]); printf("usage %s \n", argv[0]); return 0; } if (strcmp("normBound", argv[1]) == 0) { if (argc != 4) { printf("usage %s \n", argv[0]); return 0; } prob = atof(argv[2]); left = atof(argv[3]); printf("prob = %f, left = %f\n", prob, left); ans = probNormBound(prob, left); printf("Answer = %f\n", ans); } else if (strcmp("normDist", argv[1]) == 0) { if (argc != 3) { printf("usage %s \n", argv[0]); return 0; } x = atof(argv[2]); ans = probNormDist(x); printf("Answer = %f\n", ans); } else { printf("usage %s \n", argv[0]); printf("usage %s \n", argv[0]); return 0; } return 0; } #endif