Created
July 6, 2015 15:25
-
-
Save kgori/24eae8d5fe46a110e260 to your computer and use it in GitHub Desktop.
Cython wrapper of PAML discrete gamma rates function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <math.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include "discrete_gamma.h" | |
// PAML code for discrete gamma | |
// Forward decs | |
double QuantileChi2(double prob, double v); | |
double LnGammaFunction(double alpha); | |
double IncompleteGamma(double x, double alpha, double ln_gamma_alpha); | |
#define QuantileGamma(prob, alpha, beta) \ | |
QuantileChi2(prob, 2.0 * (alpha)) / (2.0 * (beta)) | |
long factorial(int n) { | |
long f = 1, i; | |
if (n > 11) | |
return -1; | |
for (i = 2; i <= (long)n; i++) | |
f *= i; | |
return (f); | |
} | |
/* functions concerning the CDF and percentage points of the gamma and | |
Chi2 distribution | |
*/ | |
double QuantileNormal(double prob) { | |
/* returns z so that Prob{x<z}=prob where x ~ N(0,1) and | |
(1e-12)<prob<1-(1e-12) | |
returns (-9999) if in error | |
Odeh RE & Evans JO (1974) The percentage points of the normal | |
distribution. | |
Applied Statistics 22: 96-97 (AS70) | |
Newer methods: | |
Wichura MJ (1988) Algorithm AS 241: the percentage points of the | |
normal distribution. 37: 477-484. | |
Beasley JD & Springer SG (1977). Algorithm AS 111: the percentage | |
points of the normal distribution. 26: 118-121. | |
*/ | |
double a0 = -.322232431088, a1 = -1, a2 = -.342242088547, | |
a3 = -.0204231210245; | |
double a4 = -.453642210148e-4, b0 = .0993484626060, b1 = .588581570495; | |
double b2 = .531103462366, b3 = .103537752850, b4 = .0038560700634; | |
double y, z = 0, p = prob, p1; | |
p1 = (p < 0.5 ? p : 1 - p); | |
if (p1 < 1e-20) | |
z = 999; | |
else { | |
y = sqrt(log(1 / (p1 * p1))); | |
z = y + | |
((((y * a4 + a3) * y + a2) * y + a1) * y + a0) / | |
((((y * b4 + b3) * y + b2) * y + b1) * y + b0); | |
} | |
return (p < 0.5 ? -z : z); | |
} | |
double LnGammaFunction(double alpha) { | |
/* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places. | |
Stirling's formula is used for the central polynomial part of the | |
procedure. | |
Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function. | |
Communications of the Association for Computing Machinery, 9:684 | |
*/ | |
double x = alpha, f = 0, z; | |
if (x < 7) { | |
f = 1; | |
z = x - 1; | |
while (++z < 7) | |
f *= z; | |
x = z; | |
f = -log(f); | |
} | |
z = 1 / (x * x); | |
return f + (x - 0.5) * log(x) - x + .918938533204673 + | |
(((-.000595238095238 * z + .000793650793651) * z - | |
.002777777777778) * | |
z + | |
.083333333333333) / | |
x; | |
} | |
double LnGamma(double x) { | |
/* returns ln(gamma(x)) for x>0, accurate to 10 decimal places. | |
Stirling's formula is used for the central polynomial part of the | |
procedure. | |
Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function. | |
Communications of the Association for Computing Machinery, 9:684 | |
*/ | |
double f = 0, fneg = 0, z, lng; | |
int nx = (int)x; | |
if ((double)nx == x && nx >= 0 && nx <= 11) | |
lng = log((double)factorial(nx - 1)); | |
else { | |
if (x <= 0) { | |
printf("LnGamma(%.6f) not implemented", x); | |
if ((int)x - x == 0) { | |
puts("lnGamma undefined"); | |
return (-1); | |
} | |
for (fneg = 1; x < 0; x++) | |
fneg /= x; | |
if (fneg < 0) | |
return (-1); | |
fneg = log(fneg); | |
} | |
if (x < 7) { | |
f = 1; | |
z = x - 1; | |
while (++z < 7) | |
f *= z; | |
x = z; | |
f = -log(f); | |
} | |
z = 1 / (x * x); | |
lng = fneg + f + (x - 0.5) * log(x) - x + .918938533204673 + | |
(((-.000595238095238 * z + .000793650793651) * z - | |
.002777777777778) * | |
z + | |
.083333333333333) / | |
x; | |
} | |
return lng; | |
} | |
double QuantileChi2(double prob, double v) { | |
/* returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v | |
returns -1 if in error. 0.000002<prob<0.999998 | |
RATNEST FORTRAN by | |
Best DJ & Roberts DE (1975) The percentage Quantiles of the | |
Chi2 distribution. Applied Statistics 24: 385-388. (AS91) | |
Converted into C by Ziheng Yang, Oct. 1993. | |
*/ | |
double e = .5e-6, aa = .6931471805, p = prob, g; | |
double xx, c, ch, a = 0, q = 0, p1 = 0, p2 = 0, t = 0, x = 0, b = 0, s1, s2, | |
s3, s4, s5, s6; | |
if (p < .000002 || p > .999998 || v <= 0) | |
return (-1); | |
g = LnGammaFunction(v / 2); | |
xx = v / 2; | |
c = xx - 1; | |
if (v >= -1.24 * log(p)) | |
goto l1; | |
ch = pow((p * xx * exp(g + xx * aa)), 1 / xx); | |
if (ch - e < 0) | |
return (ch); | |
goto l4; | |
l1: | |
if (v > .32) | |
goto l3; | |
ch = 0.4; | |
a = log(1 - p); | |
l2: | |
q = ch; | |
p1 = 1 + ch * (4.67 + ch); | |
p2 = ch * (6.73 + ch * (6.66 + ch)); | |
t = -0.5 + (4.67 + 2 * ch) / p1 - (6.73 + ch * (13.32 + 3 * ch)) / p2; | |
ch -= (1 - exp(a + g + .5 * ch + c * aa) * p2 / p1) / t; | |
if (fabs(q / ch - 1) - .01 <= 0) | |
goto l4; | |
else | |
goto l2; | |
l3: | |
x = QuantileNormal(p); | |
p1 = 0.222222 / v; | |
ch = v * pow((x * sqrt(p1) + 1 - p1), 3.0); | |
if (ch > 2.2 * v + 6) | |
ch = -2 * (log(1 - p) - c * log(.5 * ch) + g); | |
l4: | |
q = ch; | |
p1 = .5 * ch; | |
if ((t = IncompleteGamma(p1, xx, g)) < 0) { | |
printf("\nerr IncompleteGamma"); | |
return (-1); | |
} | |
p2 = p - t; | |
t = p2 * exp(xx * aa + g + p1 - c * log(ch)); | |
b = t / ch; | |
a = 0.5 * t - b * c; | |
s1 = (210 + a * (140 + a * (105 + a * (84 + a * (70 + 60 * a))))) / 420; | |
s2 = (420 + a * (735 + a * (966 + a * (1141 + 1278 * a)))) / 2520; | |
s3 = (210 + a * (462 + a * (707 + 932 * a))) / 2520; | |
s4 = (252 + a * (672 + 1182 * a) + c * (294 + a * (889 + 1740 * a))) / 5040; | |
s5 = (84 + 264 * a + c * (175 + 606 * a)) / 2520; | |
s6 = (120 + c * (346 + 127 * c)) / 5040; | |
ch += | |
t * (1 + 0.5 * t * s1 - | |
b * c * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6)))))); | |
if (fabs(q / ch - 1) > e) | |
goto l4; | |
return (ch); | |
} | |
double IncompleteGamma(double x, double alpha, double ln_gamma_alpha) { | |
/* returns the incomplete gamma ratio I(x,alpha) where x is the upper | |
limit of the integration and alpha is the shape parameter. | |
returns (-1) if in error | |
ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant. | |
(1) series expansion if (alpha>x || x<=1) | |
(2) continued fraction otherwise | |
RATNEST FORTRAN by | |
Bhattacharjee GP (1970) The incomplete gamma integral. Applied | |
Statistics, | |
19: 285-287 (AS32) | |
*/ | |
int i; | |
double p = alpha, g = ln_gamma_alpha; | |
double accurate = 1e-8, overflow = 1e30; | |
double factor, gin = 0, rn = 0, a = 0, b = 0, an = 0, dif = 0, term = 0, | |
pn[6]; | |
if (x == 0) | |
return (0); | |
if (x < 0 || p <= 0) | |
return (-1); | |
factor = exp(p * log(x) - x - g); | |
if (x > 1 && x >= p) | |
goto l30; | |
/* (1) series expansion */ | |
gin = 1; | |
term = 1; | |
rn = p; | |
l20: | |
rn++; | |
term *= x / rn; | |
gin += term; | |
if (term > accurate) | |
goto l20; | |
gin *= factor / p; | |
goto l50; | |
l30: | |
/* (2) continued fraction */ | |
a = 1 - p; | |
b = a + x + 1; | |
term = 0; | |
pn[0] = 1; | |
pn[1] = x; | |
pn[2] = x + 1; | |
pn[3] = x * b; | |
gin = pn[2] / pn[3]; | |
l32: | |
a++; | |
b += 2; | |
term++; | |
an = a * term; | |
for (i = 0; i < 2; i++) | |
pn[i + 4] = b * pn[i + 2] - an * pn[i]; | |
if (pn[5] == 0) | |
goto l35; | |
rn = pn[4] / pn[5]; | |
dif = fabs(gin - rn); | |
if (dif > accurate) | |
goto l34; | |
if (dif <= accurate * rn) | |
goto l42; | |
l34: | |
gin = rn; | |
l35: | |
for (i = 0; i < 4; i++) | |
pn[i] = pn[i + 2]; | |
if (fabs(pn[4]) < overflow) | |
goto l32; | |
for (i = 0; i < 4; i++) | |
pn[i] /= overflow; | |
goto l32; | |
l42: | |
gin = 1 - factor * gin; | |
l50: | |
return (gin); | |
} | |
int DiscreteGamma(double freqK[], double rK[], double alpha, double beta, int K, | |
int UseMedian) { | |
/* discretization of G(alpha, beta) with equal proportions in each category. | |
*/ | |
int i; | |
double t, mean = alpha / beta, lnga1; | |
if (UseMedian) { /* median */ | |
for (i = 0; i < K; i++) { | |
rK[i] = QuantileGamma((i * 2. + 1) / (2. * K), alpha, beta); | |
} | |
for (i = 0, t = 0; i < K; i++) { | |
t += rK[i]; | |
} | |
for (i = 0; i < K; i++) { | |
rK[i] *= mean * K / t; /* rescale so that the mean is alpha/beta. */ | |
} | |
} else { /* mean */ | |
lnga1 = LnGamma(alpha + 1); | |
for (i = 0; i < K - 1; i++) { /* cutting points, Eq. 9 */ | |
freqK[i] = QuantileGamma((i + 1.0) / K, alpha, beta); | |
} | |
for (i = 0; i < K - 1; i++) { /* Eq. 10 */ | |
freqK[i] = IncompleteGamma(freqK[i] * beta, alpha + 1, lnga1); | |
} | |
rK[0] = freqK[0] * mean * K; | |
for (i = 1; i < K - 1; i++) { | |
rK[i] = (freqK[i] - freqK[i - 1]) * mean * K; | |
} | |
rK[K - 1] = (1 - freqK[K - 2]) * mean * K; | |
} | |
for (i = 0; i < K; i++) | |
freqK[i] = 1.0 / K; | |
return (0); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int DiscreteGamma(double freqK[], double rK[], double alpha, double beta, int K, | |
int UseMedian); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
from setuptools import setup, Extension | |
except ImportError: | |
from distutils.core import setup, Extension | |
from Cython.Distutils import build_ext | |
import numpy | |
ext = Extension("utils", | |
sources = ['utils.pyx', | |
'discrete_gamma.c'], | |
include_dirs = [numpy.get_include()], | |
) | |
setup(cmdclass={'build_ext':build_ext}, | |
name="utilities", | |
author='Kevin Gori', | |
author_email='[email protected]', | |
description='Wrapper of utility c functions', | |
url='', | |
version="0.0.1", | |
ext_modules = [ext], | |
install_requires = ['cython'], | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
cimport numpy as np | |
__all__ = ['discrete_gamma'] | |
cdef extern from "discrete_gamma.h": | |
int DiscreteGamma(double* freqK, double* rK, double alpha, double beta, int K, int UseMedian) | |
def _discrete_gamma(np.ndarray[np.double_t,ndim=1] freqK, np.ndarray[np.double_t,ndim=1] rK, alpha, beta, K, UseMedian): | |
return DiscreteGamma(<double*>freqK.data, <double*>rK.data, alpha, beta, K, UseMedian) | |
def discrete_gamma(alpha, ncat, median_rates=False): | |
""" | |
Generates rates for discrete gamma distribution, | |
for `ncat` categories. By default calculates mean rates, | |
can also calculate median rates by setting median_rates=True. | |
Phylogenetic context, so assumes that gamma parameters | |
alpha == beta, so that expectation of gamma dist. is 1. | |
C source code taken from PAML. | |
Usage: | |
rates = discrete_gamma(0.5, 5) # Mean rates (see Fig 4.9, p.118, Ziheng's book on lizards) | |
>>> array([ 0.02121238, 0.15548577, 0.46708288, 1.10711735, 3.24910162]) | |
""" | |
weights = np.zeros(ncat, dtype=np.double) | |
rates = np.zeros(ncat, dtype=np.double) | |
_ = _discrete_gamma(weights, rates, alpha, alpha, ncat, <int>median_rates) | |
return rates |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment