Created
March 7, 2024 19:54
-
-
Save fredrik-johansson/c1ab3ce215e1cf80115c422a103cf31f to your computer and use it in GitHub Desktop.
Taylor experiment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <math.h> | |
#include "longlong.h" | |
#include "ulong_extras.h" | |
#include "mpn_extras.h" | |
#include "arf.h" | |
#include "arb.h" | |
#include "profiler.h" | |
/* util: print fixed-point number given by n limbs where fracn limbs are fractional. */ | |
void | |
flint_mpn_print_fixed(mp_srcptr x, mp_size_t n, mp_size_t fracn) | |
{ | |
if (n == 0) | |
{ | |
flint_printf("0"); | |
} | |
else | |
{ | |
arf_t y; | |
fmpz_t t; | |
fmpz_init(t); | |
slong d, fracd, dmag; | |
arf_init(y); | |
fmpz_init(t); | |
fmpz_set_ui_array(t, x, n); | |
arf_set_fmpz(y, t); | |
arf_mul_2exp_si(y, y, -fracn * FLINT_BITS); | |
dmag = log10(arf_get_d(y, ARF_RND_DOWN)); | |
fracd = fracn * (FLINT_BITS * 0.3010299956639812); | |
d = FLINT_MAX(1, dmag + fracd); | |
arf_printd(y, d); | |
fmpz_clear(t); | |
arf_clear(y); | |
} | |
flint_printf("\n"); | |
} | |
/* c_n = (20! / n!) */ | |
ulong cs[] = { | |
UWORD(2432902008176640000), | |
UWORD(2432902008176640000), | |
UWORD(1216451004088320000), | |
UWORD(405483668029440000), | |
UWORD(101370917007360000), | |
UWORD(20274183401472000), | |
UWORD(3379030566912000), | |
UWORD(482718652416000), | |
UWORD(60339831552000), | |
UWORD(6704425728000), | |
UWORD(670442572800), | |
UWORD(60949324800), | |
UWORD(5079110400), | |
UWORD(390700800), | |
UWORD(27907200), | |
UWORD(1860480), | |
UWORD(116280), | |
UWORD(6840), | |
UWORD(380), | |
UWORD(20), | |
UWORD(1), | |
}; | |
mp_limb_t __gmpn_preinv_divrem_1(mp_ptr qp, mp_size_t fn, mp_srcptr np, mp_size_t nn, mp_limb_t d, mp_limb_t dinv, int cnt); | |
/* Single-limb inverse of 20! */ | |
ulong c0_preinv1; | |
int c0_preinv1_norm; | |
/* Multi-limb inverse of 20! */ | |
mp_limb_t c0inv[100]; | |
void | |
precompute() | |
{ | |
c0_preinv1 = n_preinvert_limb(cs[0]); | |
c0_preinv1_norm = FLINT_BITS - FLINT_BIT_COUNT(cs[0]); | |
/* 1/cs[0] */ | |
flint_mpn_zero(c0inv, 11); | |
c0inv[11] = 1; | |
mpn_divrem_1(c0inv, 0, c0inv, 12, cs[0]); | |
} | |
/* a0 + x (a1 + x (a2 + x (a3 + x (a4 + x (a5 + x (a6 + x (a7 + x (a8 + x a9)))))))) */ | |
void | |
mpn_exp_series_10_horner(mp_ptr res, mp_srcptr c, mp_srcptr x) | |
{ | |
mp_limb_t t[10]; | |
mp_limb_t s[10]; | |
s[1] = c[9]; | |
s[0] = 0; | |
flint_mpn_mulhigh_n(t, s, x + 9 - 2, 2); | |
t[2] = c[8]; | |
flint_mpn_mulhigh_n(s, t, x + 9 - 3, 3); | |
s[3] = c[7]; | |
flint_mpn_mulhigh_n(t, s, x + 9 - 4, 4); | |
t[4] = c[6]; | |
flint_mpn_mulhigh_n(s, t, x + 9 - 5, 5); | |
s[5] = c[5]; | |
flint_mpn_mulhigh_n(t, s, x + 9 - 6, 6); | |
t[6] = c[4]; | |
flint_mpn_mulhigh_n(s, t, x + 9 - 7, 7); | |
s[7] = c[3]; | |
flint_mpn_mulhigh_n(t, s, x + 9 - 8, 8); | |
t[8] = c[2]; | |
flint_mpn_mulhigh_n(s, t, x + 9 - 9, 9); | |
s[9] = c[1]; | |
flint_mpn_mulhigh_n(res + 1, s + 1, x, 9); | |
res[10] = c[0]; | |
} | |
/* (a0 + a1 x + a2 x^2) + x^3 * ((a3 + a4 x + a5 x^2) + x^3*((a6 + a7 x + a8 x^2) + a9 x^3)) | |
todo: consider | |
a0 + (a1 x + a2 x^2) + a3 x^3 + x^3 * ((a4 x + a5 x^2) + a6 x^3 + x^3*((a7 x + a8 x^2) + a9 x^3)) | |
to reduce the precision of the muls | |
*/ | |
void | |
mpn_exp_series_10_rs3b(mp_ptr res, mp_srcptr c, mp_srcptr x) | |
{ | |
mp_limb_t x2[8]; | |
mp_limb_t x3p[8]; | |
mp_limb_t s[11]; | |
mp_limb_t t[11]; | |
/* zero-pad so that we can fudge the final nx(n-1) mul as an nxn mul */ | |
mp_ptr x3 = x3p + 1; | |
x3p[0] = 0; | |
/* x^2 with 8 limbs of precision */ | |
flint_mpn_sqrhigh(x2, x + 1, 8); | |
/* x^3 with 7 limbs of precision */ | |
flint_mpn_mulhigh_n(x3, x + 2, x2 + 1, 7); | |
umul_ppmm(t[1], t[0], x3[6], c[9]); | |
t[2] = mpn_addmul_1(t, x2 + 8 - 2, 2, c[8]); | |
t[3] = mpn_addmul_1(t, x + 9 - 3, 3, c[7]); | |
t[4] = c[6]; | |
flint_mpn_mulhigh_n(s, t, x3 + 7 - 5, 5); | |
s[5] = mpn_addmul_1(s, x2 + 8 - 5, 5, c[5]); | |
s[6] = mpn_addmul_1(s, x + 9 - 6, 6, c[4]); | |
s[7] = c[3]; | |
flint_mpn_mulhigh_n(res, s, x3 - 1, 8); | |
res[8] = mpn_addmul_1(res, x2, 8, c[2]); | |
__gmpn_preinv_divrem_1(res, 0, res, 9, c[0], c0_preinv1, c0_preinv1_norm); | |
res[9] = mpn_add_n(res, res, x, 9); | |
res[10] = 1; | |
} | |
void | |
mpn_exp_series_10_rs3c(mp_ptr res, mp_srcptr c, mp_srcptr x) | |
{ | |
mp_limb_t x2[8]; | |
mp_limb_t x3p[8]; | |
mp_limb_t s[11]; | |
mp_limb_t t[11]; | |
/* zero-pad so that we can fudge the final nx(n-1) mul as an nxn mul */ | |
mp_ptr x3 = x3p + 1; | |
x3p[0] = 0; | |
/* x^2 with 8 limbs of precision */ | |
flint_mpn_sqrhigh(x2, x + 1, 8); | |
/* x^3 with 7 limbs of precision */ | |
flint_mpn_mulhigh_n(x3, x + 2, x2 + 1, 7); | |
umul_ppmm(t[1], t[0], x3[6], c[9]); | |
t[2] = mpn_addmul_1(t, x2 + 8 - 2, 2, c[8]); | |
t[3] = mpn_addmul_1(t, x + 9 - 3, 3, c[7]); | |
t[4] = c[6]; | |
flint_mpn_mulhigh_n(s, t, x3 + 7 - 5, 5); | |
s[5] = mpn_addmul_1(s, x2 + 8 - 5, 5, c[5]); | |
s[6] = mpn_addmul_1(s, x + 9 - 6, 6, c[4]); | |
s[7] = c[3]; | |
flint_mpn_mulhigh_n(t, s, x3 - 1, 8); | |
t[8] = mpn_addmul_1(t, x2, 8, c[2]); | |
flint_mpn_mulhigh_n(res, t, c0inv + 2, 9); | |
res[9] = mpn_add_n(res, res, x, 9); | |
res[10] = 1; | |
} | |
void | |
mpn_exp_series_10_rs3d(mp_ptr res, mp_srcptr c, mp_srcptr x) | |
{ | |
mp_limb_t x2[8]; | |
mp_limb_t x3d[8]; | |
mp_limb_t s[11]; | |
mp_limb_t t[11]; | |
/* zero-pad x^3 so that we can fudge the final the 8x7 horner mul by x^3 | |
as an 8x8 mulhigh_n */ | |
x3d[0] = 0; | |
mp_ptr x3 = x3d + 1; | |
/* x^2 with 8 limbs of precision */ | |
flint_mpn_sqrhigh(x2, x + 1, 8); | |
/* x^3 with 7 limbs of precision */ | |
flint_mpn_mulhigh_n(x3, x + 2, x2 + 1, 7); | |
umul_ppmm(t[1], t[0], x3[6], c[9]); /* t[1] = mpn_addmul_1(t, x3 + 8 - 1, 1, c[9]); */ | |
t[2] = mpn_addmul_1(t, x2 + 8 - 2, 2, c[8]); | |
t[3] = mpn_addmul_1(t, x + 9 - 3, 3, c[7]); | |
t[4] = c[6]; | |
flint_mpn_mulhigh_n(s, t, x3 + 7 - 5, 5); | |
s[5] = mpn_addmul_1(s, x2 + 8 - 5, 5, c[5]); | |
s[6] = mpn_addmul_1(s, x + 9 - 6, 6, c[4]); | |
s[7] = c[3]; | |
flint_mpn_mulhigh_n(t, s, x3 - 1, 8); | |
/* divide by factorial */ | |
flint_mpn_mulhigh_n(res, t, c0inv + 3, 8); | |
/* + x^2 / 2 (9 limbs) */ | |
mpn_rshift(t, x2, 8, 1); | |
res[8] = mpn_add_n(res, res, t, 8); | |
/* + x (10 limbs) */ | |
res[9] = mpn_add_n(res, res, x, 9); | |
/* + 1 */ | |
res[10] = 1; | |
} | |
/* (a0 + a1 x + a2 x^2 + a3 x^3) + x^4 * ((a4 + a5 x + a6 x^2 + a7 x^3) + x^4* (a8 + a9 x) */ | |
void | |
mpn_exp_series_10_rs4(mp_ptr res, mp_srcptr c, mp_srcptr x) | |
{ | |
mp_limb_t x2[8]; | |
mp_limb_t x3[8]; | |
mp_limb_t x4p[8]; | |
mp_limb_t s[11]; | |
mp_limb_t t[11]; | |
/* zero-pad so that we can fudge the final nx(n-1) mul as an nxn mul */ | |
mp_ptr x4 = x4p + 1; | |
x4p[0] = 0; | |
flint_mpn_sqrhigh(x2, x + 1, 8); | |
flint_mpn_mulhigh_n(x3, x + 2, x2 + 1, 7); | |
flint_mpn_sqrhigh(x4, x2 + 2, 6); | |
umul_ppmm(t[1], t[0], x[8], c[9]); /* t[1] = mpn_mul_1(t, x + 9 - 1, 1, c[9]); */ | |
t[2] = c[8]; | |
flint_mpn_mulhigh_n(s, t, x4 + 6 - 3, 3); | |
s[3] = mpn_addmul_1(s, x3 + 7 - 3, 3, c[7]); | |
s[4] = mpn_addmul_1(s, x2 + 8 - 4, 4, c[6]); | |
s[5] = mpn_addmul_1(s, x + 9 - 5, 5, c[5]); | |
s[6] = c[4]; | |
flint_mpn_mulhigh_n(t, s, x4 + 6 - 7, 7); | |
t[7] = mpn_addmul_1(t, x3, 7, c[3]); | |
/* divide by factorial */ | |
flint_mpn_mulhigh_n(res, t, c0inv + 3, 8); | |
/* + x^2 / 2 (9 limbs) */ | |
mpn_rshift(t, x2, 8, 1); | |
res[8] = mpn_add_n(res, res, t, 8); | |
/* + x (10 limbs) */ | |
res[9] = mpn_add_n(res, res, x, 9); | |
/* + 1 */ | |
res[10] = 1; | |
} | |
int main() | |
{ | |
mp_limb_t X[400], Y[400], Z[400]; | |
precompute(); | |
flint_mpn_zero(X, 100); | |
flint_mpn_zero(Y, 100); | |
#if 0 | |
ulong dinv = n_preinvert_limb(123987); | |
int cnt = 64 - FLINT_BIT_COUNT(123987); | |
slong n; | |
for (n = 1; n < 20; n++) | |
{ | |
flint_printf("%wd\n", n); | |
TIMEIT_START | |
flint_mpn_mulhigh_n(Z, X, Y, n); | |
TIMEIT_STOP | |
TIMEIT_START | |
mpn_divrem_1(X, 0, X, n, 123987); | |
TIMEIT_STOP | |
TIMEIT_START | |
__gmpn_preinv_divrem_1(X, 0, X, n, 123987, dinv, cnt); | |
TIMEIT_STOP | |
} | |
#endif | |
/* x = 1/10^20 < 2^-64 */ | |
X[10] = 1; | |
mpn_divrem_1(X, 0, X, 11, n_pow(10, 10)); | |
mpn_divrem_1(X, 0, X, 10, n_pow(10, 10)); | |
arb_t y, x; | |
arb_init(x); | |
arb_init(y); | |
arb_set_str(x, "1e-20", 640); | |
mag_zero(arb_radref(x)); | |
flint_printf("arb_exp\n"); | |
TIMEIT_START | |
arb_exp(y, x, 640); | |
TIMEIT_STOP | |
arb_printn(y, 0.3010299956639812 * 640, ARB_STR_NO_RADIUS); flint_printf("\n"); | |
flint_printf("\nmpn_exp_series_10_rs3d (mulhigh):\n"); | |
TIMEIT_START | |
mpn_exp_series_10_rs3d(Y, cs, X); | |
TIMEIT_STOP | |
flint_mpn_print_fixed(Y, 11, 10); | |
flint_printf("\n_arb_exp_taylor_rs:\n"); | |
TIMEIT_START | |
_arb_exp_taylor_rs(Z, Y + 50, X, 10, 10); | |
TIMEIT_STOP | |
flint_mpn_print_fixed(Z, 11, 10); | |
flint_printf("\nmpn_exp_series_10_horner (divrem_1):\n"); | |
TIMEIT_START | |
mpn_exp_series_10_horner(Y, cs, X); | |
mpn_divrem_1(Y, 0, Y, 11, cs[0]); | |
TIMEIT_STOP | |
flint_mpn_print_fixed(Y, 11, 10); | |
#if 0 | |
flint_printf("\nmpn_exp_series_10_horner (preinv_divrem_1):\n"); | |
TIMEIT_START | |
mpn_exp_series_10_horner(Y, cs, X); | |
__gmpn_preinv_divrem_1(Y, 0, Y, 11, cs[0], c0_preinv1, c0_preinv1_norm); | |
TIMEIT_STOP | |
flint_mpn_print_fixed(Y, 11, 10); | |
flint_printf("\nmpn_exp_series_10_horner (mulhigh):\n"); | |
TIMEIT_START | |
mpn_exp_series_10_horner(Z, cs, X); | |
flint_mpn_mulhigh_n(Y, Z, c0inv, 11); | |
TIMEIT_STOP | |
flint_mpn_print_fixed(Y, 11, 10); | |
#endif | |
flint_printf("\nmpn_exp_series_10_rs3d:\n"); | |
TIMEIT_START | |
mpn_exp_series_10_rs3c(Y, cs, X); | |
TIMEIT_STOP | |
flint_mpn_print_fixed(Y, 11, 10); | |
flint_printf("\nmpn_exp_series_10_rs4:\n"); | |
TIMEIT_START | |
mpn_exp_series_10_rs4(Y, cs, X); | |
TIMEIT_STOP | |
flint_mpn_print_fixed(Y, 11, 10); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment