/* This file contains libraries of statistical functions, including
   those for:
  
    * Error function
    * Inverse error function
    * Normal/Gaussian distribution
    * Inverse functions for Normal distribution
    * Log-Normal distribution
    * Binomial distribution
    * Poisson distribution.
    * Weibull distribution
    * Exponential distribution
  
    TODO:  These functions don't yet work for arbitrary precision (e.g.
    phi[9] gives 0.9999999999999999999 and larger than that just gives 1).
    Some functions claim to be able to calculate to a large number of digits,
    but that sometimes doesn't happen.  (This should include and use
    ArbitraryPrecision.frink for those cases.)
  
    However, many of these functions now dispatch correctly to correct limiting
    functions, so if you set precision properly, you can get a good answer.
    (See the dispatching functions erf, erfc, inverseErf, and inverseErfc
     for examples of choosing the right function for various ranges.)
  
    for example, if you call:
  
      setPrecision[40]
      a = phi[9.4]
  
    You will get:
  
       0.9999999999999999999972718464286538660795
  
    And then calling:
  
       inversePhi[a]
  
    You will get:
        9.4000000000000007230366609333197
  
    Which is a reasonable answer to lots of decimal places.  You may need to
    request even more working precision.  For example, phi[12], calculated with
    40 digits of precision is:
  
    0.999999999999999999999999999999998223518
  
    While phi[13] gets dangerously close to losing all information with
    40 digits of precision:
  
    phi[13]
    0.999999999999999999999999999999999999994
   
    While any statistical function with this much confidence is totally
    suspect, it's sometimes useful to be able to calculate how silly that
    some statistical claims really are with lots of digits.
*/

/** Error function erf, complementary error function erfc, and their
   inverses.

   Implementation of the error function erf[x] and the complementary error
   function erfc[x] where erfc[x] = 1 - erf[x].

   Error function erf[x]
   This is a "smart" dispatcher function that attempts to call the right
   function to calculate for a given range.  This is the function that users
   should call.
*/
erf[x] := (x<0 ? -erf[-x] : ((abs[x] > 4.0) ? 1-erfcAsymptotic[x] : erfTaylor[x]))

/** Complementary error function erfc[x] where erfc[x] = 1 - erf[x]
    This dispatches to a "smart" dispatcher function that attempts to call the
    right function to calculate for a given range.  This is the function that
    users should call.  Note that this is 1-erf[x], but fixed slightly so that
    we don't underflow and lose precision if erf[x] is tiny.
*/
erfc[x] := (x<0 ? 2 - erfc[-x] : ((abs[x] > 4.0) ? erfcAsymptotic[x] : 1-erfTaylor[x]))

// Inverse error function.  This is a dispatcher function that calls the
// best algorithm depending on the size of the number.  This is the function
// that users should call.
inverseErf[z, digits=getPrecision[]] := z < 0 ? -inverseErf[-z, digits] : (z > .96 ? inverseErfcFettis[1-z] : inverseErfForward[z,digits])

// Complementary inverse error function.
inverseErfc[z, digits=getPrecision[]] := z < 0 ? -inverseErfc[-z, digits] : (z < .04 ? inverseErfcFettis[z] : inverseErfForward[1-z,digits])

/* =====================================================
   Functions for the normal/Gaussian distribution.

*/

// The probability distribution function or probability mass function
// for the normal distribution.   You can, say, use this to plot the typical
// bell curve.  Note, however, that it's the *integral* of this function that
// you generally want to work with, which is represented by the phi function
// below.
normalDensity[x, mean, sd] :=  1/(sd * sqrt[2 pi]) exp[-(x-mean)^2/(2 sd^2)]


// This is the cumulative distribution function for the normal/Gaussian
// distribution.
//
// Calculate the probability (from 0 to 1) of a score *less than or equal to* x
// in a normally-distributed distribution with the given mean and standard
// deviation.
// For example, to find the percentile score of an IQ of 130, given the
// assumption that IQ has mean of 100 and sd of 15, use:
//   phi[130, 100, 15]
// which gives 0.977, or the 97.7th percentile.
phi[x, mean, sd] := phi[(x-mean)/sd]

// An alias for phi[x, mean, sd]
normalProbability[x, mean, sd] := phi[x, mean, sd]


// Calculate the probability that a random value z has a value less than or
// equal to x, assuming x is normally distributed, with a mean of 0 and a
// standard deviation of 1.
// This is fundamental to lots of statistics.  It is essentially the integral
// of the normal curve from negative infinity to z.
phi[z] := 1/2 (1 + erf[z/sqrt[2]])

// Inverse functions for the normal distribution.

// Inverse normal probability.  This gives a z-score (number of standard
// deviations above or below the mean).  So, for example, inversePhi[0.5]
// gives a z-score of 0, as the 50th percentile is right at the mean.
inversePhi[phi, digits=getPrecision[]] := sqrt[2] inverseErf[2 phi - 1, digits]

// An alias for inversePhi[phi]
inverseNormalProbability[phi, digits=getPrecision[]] := inversePhi[phi, digits]

// Inverse phi function for a distribution with the specified mean and
// standard deviation.  For example, the IQ of someone in the 98th percentile
// can be calculated with the following, assuming the mean IQ is 100 and the
// IQ scale has a standard deviation of 15:
//   inversePhi[0.98, 100, 15] 
inversePhi[phi, mean, sd] := mean + inversePhi[phi] * sd

// An alias for inversePhi
inverseNormalProbability[phi, mean, sd] := inversePhi[phi, mean, sd]


/** Calculates the mean and standard deviation of an array or
    enumerating expression.

    This uses Welford's algorithm as cited in Knuth, The Art of Computer
    Programming, Vol. 2, 3rd edition, page 232.

    Arguments:
     [list, sample]

    where
     list: is an array or enumerating expression of the items to average.
     sample: a boolean flag indicating if we want the standard deviation to be
         the sample standard variation (=true)  or the population standard
         variation (=false).

    Returns:
     [mean, sd, number]
*/
meanAndSD[list, sample] :=
{
   M = 0 list@0 // Make units work right
   S = 0 (list@0)^2 // Make units work out right
   k = 1

   for v = list
   {
      oldM = M
      diff = v-oldM
      M = M + diff / k
      S = S + diff * (v - M)
      k = k+1
   }

   if sample == true
      sub = 1            // Sample standard deviation, subtract 1 from num
   else
      sub = 0            // Population standard deviation
   
   return [M, sqrt[S/(k-1-sub)], k-1]
}


/* =====================================================
   Functions for the log-normal distribution.

A log-normal distribution is a probability distribution of a random variable
whose logarithm is normally distributed. If X is a random variable with a
normal distribution, then Y = exp(X) has a log-normal distribution; likewise,
if Y is log-normally distributed, then X = log(Y) is normally distributed.
*/

/*
   The probability density function or probability mass function
for the log-normal distribution.
*/

logNormalDensity[x, mean, sd] :=
{
   1/(x sqrt[2 pi sd^2]) e^-((ln[x] - mean)^2 / (2 sd^2))
}

/** The cumulative distribution function for the log-normal distribution. */
logNormalProbability[x, mean, sd] := 1/2 (1 + erf[(ln[x] - mean)/sqrt[2 sd^2]])


/* ===================================================
 Functions for the Binomial distribution

 This gives the probability density function or probability mass function
 of the binomial distribution.  The function below gives the probability of
 getting *exactly* k successes in n trials, where the probability of success
 of a single trial is p.

 (If the number of trials n is large and the probability low, the Binomial
 distribution approaches the Poisson distribution (see below.).  To be more
 precise, binomialDensity[k, n, p] approaches poissonDensity[k, n * p])

 For example, the probability of getting exactly 8 heads out of 10 coin
 flips is given by:
 
 binomialDensity[8, 10, 1/2]
*/
binomialDensity[k, n, p] :=  binomial[n, k] p^k (1-p)^(n-k)


/* This gives the cumulative probability of getting x *or fewer* successes in
   n trials, where the probability of success of a single trial is p.

   For example, the probability of getting 8 or fewer heads out of 10 coin
   flips is given by:
 
   binomialProbability[8, 10, 1/2]

   (If the number of trials n is large and the probability low, the Binomial
   distribution approaches the Poisson distribution (see below.).  To be more
   precise, binomialProbability[x, n, p] approaches
             poissonProbability[x, n * p])
*/
binomialProbability[x, n, p] :=
{
   sum = 0
   for i = 0 to floor[x]
      sum = sum + binomialDensity[i, n, p]

   return sum
}

/* binomialProbabilityNew[x, n, p] :=
{
   /** This is a slightly optimized version of the naive:

   for i = 0 to floor[x]
      sum = sum + binomialDensity[i, n, p]

   but for large number of iterations it isn't significantly faster.  Most
   of the time is spent because integers and rational numbers get HUGE.
   */

   sum = 0
   ratio = p/(1-p)
   term = (1-p)^n
   for i = 0 to floor[x]
   {
      sum = sum + binomial[n, i] term
      term = term * ratio
   }

   return sum
}
*/


/* ===================================================================
Functions for the Poisson distribution.

The Poisson distribution is a discrete probability distribution that expresses
the probability of a number of events occurring in a fixed period of time if
these events occur with a known average rate and independently of the time
since the last event.

See the exponential distribution functions below for predictions of when
events are likely to happen.
*/

/* This gives the probability mass function or probability density function of
the Poisson distribution.  This gives the probability of getting *exactly* k
occurrences of an event, given the expected number of occurrences (expected)
in that interval. */
poissonDensity[k, expected] :=  expected^k e^(-expected) / k!

/*
This gives the cumulative probability or cumulative distribution function of
the Poisson distribution.  This gives the probability of getting k *or fewer*
occurrences of the event, given the expected number of occurrences (expected)
in that interval.
*/
poissonProbability[k, expected] :=
{
   if k <0
      return 0
   
   // Since we're going to be multiplying by a transcendental (e^-expected)
   // we can use floating-point through all calculations.
   sum = 1.
   num = 1.
   denom = 1.

   // This loop is a faster way of doing sum(i=1 to k,  expected^i / i!)
   for i=1 to k
   {
       num = num * expected
       denom = denom * i
       sum = sum + num/denom
   }
   
   return sum * e^(-expected)
}


/* ===================================================================
Functions for the Exponential distribution.

The exponential distribution is a discrete probability distribution that
expresses the time between events in a Poisson process, i.e. a process in
which events occur continuously and independently at a constant average rate.

See the Poisson distribution functions above for functions related to the
probabilities of *multiple* events with an exponential distribution occurring.
*/

/* This gives the probability mass function or probability density function of
the exponential distribution.  This gives the probability of an event
occurring *exactly* at time x, given the expected number of occurrences
(expected) in a given time.  You will usually not want to use this function
but the cumulative distribution function exponentialProbability below. */
exponentialDensity[x, expected] := expected e^(-expected x)

/* This gives the cumulative distribution function of the exponential
distribution.  This gives the probability of an event occurring at time x *or
before*, given the expected number of occurrences (expected) in a given
time. */
exponentialProbability[x, expected] :=  1 - e^(-expected x)


/* ===================================================================
Functions for the Weibull distribution.

The Weibull distribution is a continuous probability distribution.  It is a
two-parameter distribution often used for time-to-failure analysis.

It is identical to the exponential distribution (see above) when k=1,
and the Rayleigh distribution when k=2.

   lambda is sometimes called the "scale" parameter, and is analogous/identical
to the "expected" value in the exponential and Poisson distributions.

   k is sometimes called the "shape" parameter.

If the quantity x is a "time-to-failure", the Weibull distribution gives a
distribution for which the failure rate is proportional to a power of
time. The shape parameter, k, is that power plus one, and so this parameter
can be interpreted directly as follows:

   * A value of k<1 indicates that the failure rate decreases over time. This
     happens if there is significant "infant mortality", or defective items
     failing early and the failure rate decreasing over time as the defective
     items are weeded out of the population.

   * A value of k=1 indicates that the failure rate is constant over
     time. This might suggest random external events are causing mortality, or
     failure.

   * A value of k>1 indicates that the failure rate increases with time. This
     happens if there is an "aging" process, or parts that are more likely to
     fail as time goes on.

   See: http://en.wikipedia.org/wiki/Weibull_distribution
*/

/* This gives the probability mass function or probability density function of
   the Weibull distribution.   */
WeibullDensity[x, lambda, k] :=
{
   if x < 0
      return 0
   else
      return k/lambda (x/lambda)^(k-1) e^-(x/lambda)^k
}

/* This gives the cumulative distribution function of the Weibull
   distribution. */
WeibullProbability[x, lambda, k] := 1 - e^-(x/lambda)^k


/* ===================================================================
Functions for the Student's t distribution.

   Student's-t distribution is a continuous distribution.  It is primarily used
   when sampling from a normally-distributed population whose standard
   deviation is unknown. It is used in the Student’s t-test and for calculating
   confidence intervals for the difference between two population means, 
*/

/* This calculates the probability mass function or probability density
   function of the StudentT distribution for a normalized t parameter and
   specified degrees of freedom (the degrees of freedom is usually 1 less
   than the number of items sampled.) */
studentTDensity[t, degreesFreedom] :=
{
   return studentTCoeff[degreesFreedom] (1 + t^2/degreesFreedom)^(-(degreesFreedom+1)/2)
}

/** This gives the cumulative distribution function of the Student-t
    distribution with the specified normalized t parameter and degrees of
    freedom. */
studentTProbability[t, degreesFreedom] :=
{
   // AUUUUGHHHHH.  What a pain in the ass this is to calculate.
}

// ===================================================================
// Functions below this point should probably not be called directly,
// as they may not behave well for all numerical values.  Use the functions
// above which automatically select the proper functions to call for different
// numeric ranges.


// Asymptotic series expansion for erfc.
// See :
// http://en.wikipedia.org/wiki/Error_function#Asymptotic_expansion

// Unfortunately, the algorithm below diverges for all values of x.
// It is able to produce more digits for large values of x before it
// begins to diverge, and is practically useless for smaller x.
//
// Using this function to calculate erf is generally better than the Taylor
// series for erf[4.0] and above.
erfcAsymptotic[x, debug = false, stats = false, digits=getPrecision[]] :=
{
   c = e^(-x^2)/(x sqrt[pi])
   if (debug)
      println["c is $c"]

   sum = 1
   precision = 10^(-digits)

   if (debug)
      println["Attempting to get precision less than $precision"]

   var term = 10000000
   var n = 0
   var lastterm
   
   do
   {
      lastterm = term
      n = n + 1
      term = (-1)^n * (2n)!/(n! (2x)^(2n))
      if (debug)
         println[term]
      sum = sum + term
   } while (abs[term/sum] > precision) and (abs[term] < abs[lastterm])

   // Did we terminate because the series started to diverge?
   if (abs[term] > abs[lastterm])
   {
      digits = -log[abs[term]]  // Adjust the number of useful digits
      if (debug)
         println["Warning:  started to diverge.  Precision will be no better than " + format[digits, 1, 1] + " digits."]
      sum = sum - term    // Back out last diverging term.
   }

   if (debug)
      println["Took $n iterations."]

   if (stats)
      return [sum * c, n, format[digits,1,3]]
   else
      return sum * c
}


// Taylor series expansion for erf[x].  See:
//
// http://en.wikipedia.org/wiki/Error_function
//
// This begins to fall apart at x=4.0 or so, and gets really bad above
// 5.0.
//
// This function should not be called directly, but call the erf[] dispatcher
// function above.
erfTaylor[x, debug = false, stats = false, digits=getPrecision[]] :=
{
   // This special case is necessary because the Taylor series just
   // sits at 0 for all iterations.
   if x == 0
      return 0

   c = 2 / sqrt[pi]
   sum = 0

   precision = 10^(-digits)

   if (debug)
      println["Attempting to get precision less than $precision"]

   var term = 10000000
   n = -1

   var lastterm
   
   do
   {
      lastterm = term
      n = n + 1
      term = (-1)^n * x^(2n+1)/((2n+1) n!)
      if (debug)
         println[term]
      sum = sum + term
   } while (abs[term/sum] > precision)

   if (debug)
      println["Took $n iterations."]

   if (stats)
      return [sum * c, n, format[digits,1,3]]
   else
      return sum * c
}


// Inverse of the Erf function
// See
//   http://functions.wolfram.com/GammaBetaErf/InverseErf/06/01/0001/
// for an explanation of the power series.
// This algorithm works but is bloody slow for z > 0.9
// It is currently rejected in favor of the algorithms due to Fettis below.
inverseErfForward[z, digits=getPrecision[], debug = false, stats = false] :=
{
   if (z==0)                    // Special case to avoid divide-by-zero
      return 0
   
   var sum = 0.
   var precision = 10^(-digits)

   var term = 10000000

   var c = sqrt[pi]/2
   var k = 0
   
   do
   {
      term = InverseErfHelper.getCk[k] (c z)^(2k + 1) / (2k+1)
      if (debug)
         println[term]
      sum = sum + term
      k = k + 1
   } while (abs[term/sum] > precision)

   if (stats)
      return [sum, k, format[digits,1,3]]
   else
      return sum
}

// This algorithm works but is bloody slow for n<0.1
inverseErfc[z, digits=getPrecision[], debug=false] :=
{
   var term
   var k = 0
   var c = sqrt[pi]/2
   var sum = 0.
   var precision = 10^(-digits)
   
   do
   {
      term = InverseErfHelper.getCk[k] (c * (z-1))^(2k+1) / (2k+1)
      if (debug)
         println[term]
      sum = sum + term
      k = k + 1
   } while (abs[term/sum] > precision)

   return -sum
}


// Asymptotic expansion of inverseErf which works for values as z->1.
// This doesn't seem to work well and probably shouldn't be trusted.
inverseErfAsymptotic[z] := (1/sqrt[2]) sqrt[log[2/(pi (z - 1)^2)] - log[log[2/(pi (z - 1)^2)]]]


// Algorithm given by "A Stable Algorithm for Computing the Inverse Error
// Function in the 'Tail-End' Region", Henry E. Fettis,  Mathematics of
// Computation, Volume 28, Number 126, April 1974, Pages 585-587
// http://www.ams.org/journals/mcom/1974-28-126/S0025-5718-1974-0341812-5/S0025-5718-1974-0341812-5.pdf
inverseErfcFettis[x] :=
{
   y = (-ln[pi^(1/2) x (-ln[x])^(1/2)])^(1/2)
//   println["Initial y is $y"]
   sqrtpi = sqrt[pi]

   do
   {
      oldy = y
      y = (ln[FettisG[y] / (sqrtpi x)])^(1/2)
//      println["Next y is $y"]
   } while abs[y - oldy]/y > 1e-15
   return y
}

// Build continued fraction from Fettis paper.
FettisG[y] :=
{
   t = 1/y
   return t / (1 + FettisG[t^2 / 2, 1])
}

// Recursively calculate parts of continued fraction.
// TODO:  Make limits dynamic.  This function actually requires *fewer*
// iterations as the numbers get larger.
// Since we're currently working with a fixed number of iterations,
// the continued fraction could be built bottom-up and non-recursively.
FettisG[tt,n] :=
{
   if n > 87                    // Good enough for z=0.96; TODO:  Make dynamic
      return 1.
   else
      return n tt / (1. + FettisG[tt, n+1])
}

//for n = 1 to 80
//   println[FettisG[(1-0.91), n]]


// This class has methods to cache the values of c_k which are used repeatedly
// in calculating the inverse Erf function.
class InverseErfHelper
{
   class var ck = new array
   class var summn = new dict[]

   class getCk[k] :=
   {
      len = length[ck]

      if (len == 0)
      {
         ck@0 = 1
         ck@1 = 1
         len = len + 2
      }

      // If cached, return it
      if len > k
         return ck@k

      var sum
      var m
      
      for n = len to k
      {
         sum = 0.

         // Calculate for all needed values
         // Some recalculation is done here.  This could be made more
         // efficient.
         for m = 0 to n-1
            sum = sum + (ck@m ck@(n-1-m))/((m+1)(2m + 1))

         ck@n = sum
//         if n > 0
//            println["ck@$n = $sum, ratio = " + (ck@n / ck@(n-1))]
      }

      return ck@k
   }
}

/** Calculate the coefficients for Student's T distribution.  This is a
    shortcut which eliminates the need to calculate the gamma function.

    Shortcuts taken from:
   http://en.wikipedia.org/wiki/Student%27s_t-distribution#Probability_density_function
*/
studentTCoeff[nu] :=
{
   
   coeff = 1
   if (nu mod 2) == 0  and nu>0   // nu even
   {
      for c = 2 to nu-2 step 2
         coeff = coeff * (c+1)/(c)
      
      coeff = coeff / (2 sqrt[nu])
      return coeff
   }

   if (nu mod 2) == 1  and nu >= 1  // nu odd
   {
      for c = 3 to nu-2 step 2
         coeff = coeff * (c-1)/c

      coeff = coeff / (pi sqrt[nu])
      return coeff
   }

   println["studentTCoeff not implemented for nu=$nu"]
   return undef
}

/** Calculate the Student's T value of a single sampled group.
*/
studentT[sampleMean, populationMean, sampleSD, sampleSize] :=
{
   return (sampleMean - populationMean) / (sampleSD / sqrt[sampleSize])
}

// Samples to examine behavior of functions
//for n=0.01 to .89 step .1
//{
//   [val, iterations, digits] = inverseErf[n, false, true, 15]
//   println["$n\t$val\t$iterations\t$digits"]
//}

//for n=0.2 to 20 step .2
//{
//   [val, iterations, digits] = erfTaylor[n, false, true, 20]
//   println["$n\t$iterations\t$digits"]
//}

"statistics.frink included OK"