Created
March 19, 2024 15:13
-
-
Save fredrik-johansson/0da150e1ddab53649ec2085a468dd753 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "gr.h" | |
#include "arf.h" | |
#include "mpn_extras.h" | |
#include "crt_helpers.h" | |
typedef struct | |
{ | |
slong sign; | |
slong exp; | |
mp_limb_t d[4]; | |
} | |
mpf4_struct; | |
typedef mpf4_struct mpf4_t[1]; | |
typedef mpf4_struct * mpf4_ptr; | |
typedef const mpf4_struct * mpf4_srcptr; | |
int | |
_mpf4_set(mpf4_t res, const mpf4_t x, gr_ctx_t ctx) | |
{ | |
*res = *x; | |
return GR_SUCCESS; | |
} | |
int | |
_mpf4_zero(mpf4_t res, gr_ctx_t ctx) | |
{ | |
res->sign = 0; | |
res->exp = 0; | |
res->d[0] = 0; | |
res->d[1] = 0; | |
res->d[2] = 0; | |
res->d[3] = 0; | |
return GR_SUCCESS; | |
} | |
int | |
_mpf4_one(mpf4_t res, gr_ctx_t ctx) | |
{ | |
res->sign = 1; | |
res->exp = 1; | |
res->d[0] = 0; | |
res->d[1] = 0; | |
res->d[2] = 0; | |
res->d[3] = UWORD(1) << (FLINT_BITS - 1); | |
return GR_SUCCESS; | |
} | |
int | |
_mpf4_neg_one(mpf4_t res, gr_ctx_t ctx) | |
{ | |
res->sign = -1; | |
res->exp = 1; | |
res->d[0] = 0; | |
res->d[1] = 0; | |
res->d[2] = 0; | |
res->d[3] = UWORD(1) << (FLINT_BITS - 1); | |
return GR_SUCCESS; | |
} | |
/* todo: overflow/underflow */ | |
int | |
_mpf4_set_arf(mpf4_t res, const arf_t x, gr_ctx_t ctx) | |
{ | |
if (arf_is_special(x)) | |
{ | |
if (arf_is_zero(x)) | |
{ | |
return _mpf4_zero(res, ctx); | |
} | |
else | |
{ | |
return GR_DOMAIN; | |
} | |
} | |
else | |
{ | |
mp_srcptr xp; | |
mp_size_t xn; | |
ARF_GET_MPN_READONLY(xp, xn, x); | |
res->d[3] = xp[xn - 1]; | |
res->d[2] = (xn >= 2) ? xp[xn - 2] : 0; | |
res->d[1] = (xn >= 3) ? xp[xn - 3] : 0; | |
res->d[0] = (xn >= 4) ? xp[xn - 4] : 0; | |
res->sign = ARF_SGNBIT(x) ? -1 : 1; | |
res->exp = ARF_EXP(x); | |
return GR_SUCCESS; | |
} | |
} | |
int | |
_mpf4_get_arf(arf_t res, const mpf4_t x, gr_ctx_t ctx) | |
{ | |
slong shift; | |
if (x->sign == 0) | |
{ | |
arf_zero(res); | |
} | |
else | |
{ | |
_arf_set_round_mpn(res, &shift, x->d, 4, x->sign < 0, 256, ARF_RND_DOWN); | |
fmpz_set_si(ARF_EXPREF(res), x->exp + shift); | |
} | |
return GR_SUCCESS; | |
} | |
__attribute__((noinline)) int | |
_mpf4_add(mpf4_ptr res, mpf4_srcptr x, mpf4_srcptr y, gr_ctx_t ctx) | |
{ | |
slong xexp, yexp, delta; | |
slong xsign, ysign; | |
if (x->sign == 0) | |
return _mpf4_set(res, y, ctx); | |
if (y->sign == 0) | |
return _mpf4_set(res, x, ctx); | |
xexp = x->exp; | |
yexp = y->exp; | |
if (xexp < yexp) | |
{ | |
FLINT_SWAP(mpf4_srcptr, x, y); | |
FLINT_SWAP(slong, xexp, yexp); | |
} | |
xsign = x->sign; | |
ysign = y->sign; | |
mp_limb_t t0, t1, t2, t3, t4, u0, u1, u2, u3, u4; | |
delta = xexp - yexp; | |
if (xsign == ysign) | |
{ | |
t0 = x->d[0]; | |
t1 = x->d[1]; | |
t2 = x->d[2]; | |
t3 = x->d[3]; | |
u0 = y->d[0]; | |
u1 = y->d[1]; | |
u2 = y->d[2]; | |
u3 = y->d[3]; | |
if (delta < FLINT_BITS) | |
{ | |
if (delta != 0) | |
{ | |
u0 = (u0 >> delta) | (u1 << (FLINT_BITS - delta)); | |
u1 = (u1 >> delta) | (u2 << (FLINT_BITS - delta)); | |
u2 = (u2 >> delta) | (u3 << (FLINT_BITS - delta)); | |
u3 = (u3 >> delta); | |
} | |
} | |
else | |
{ | |
if (delta < 2 * FLINT_BITS) | |
{ | |
if (delta == FLINT_BITS) | |
{ | |
u0 = u1; | |
u1 = u2; | |
u2 = u3; | |
} | |
else | |
{ | |
u0 = (u1 >> delta) | (u2 << (FLINT_BITS - delta)); | |
u1 = (u2 >> delta) | (u3 << (FLINT_BITS - delta)); | |
u2 = (u3 >> delta); | |
} | |
u3 = 0; | |
} | |
else if (delta < 3 * FLINT_BITS) | |
{ | |
if (delta == 2 * FLINT_BITS) | |
{ | |
u0 = u2; | |
u1 = u3; | |
} | |
else | |
{ | |
u0 = (u2 >> delta) | (u3 << (FLINT_BITS - delta)); | |
u1 = (u3 >> delta); | |
} | |
u2 = 0; | |
u3 = 0; | |
} | |
else if (delta < 4 * FLINT_BITS) | |
{ | |
if (delta == 3 * FLINT_BITS) | |
u0 = u3; | |
else | |
u0 = (u3 >> delta); | |
u1 = 0; | |
u2 = 0; | |
u3 = 0; | |
} | |
else | |
{ | |
goto write_noshift; | |
} | |
} | |
add_sssssaaaaaaaaaa(t4, t3, t2, t1, t0, 0, t3, t2, t1, t0, 0, u3, u2, u1, u0); | |
if (t4 == 0) | |
{ | |
write_noshift: | |
res->sign = xsign; | |
res->exp = xexp; | |
res->d[0] = t0; | |
res->d[1] = t1; | |
res->d[2] = t2; | |
res->d[3] = t3; | |
} | |
else | |
{ | |
res->sign = xsign; | |
res->exp = xexp + 1; | |
res->d[0] = (t0 >> 1) | (t1 << (FLINT_BITS - 1)); | |
res->d[1] = (t1 >> 1) | (t2 << (FLINT_BITS - 1)); | |
res->d[2] = (t2 >> 1) | (t3 << (FLINT_BITS - 1)); | |
res->d[3] = (t3 >> 1) | (UWORD(1) << (FLINT_BITS - 1)); | |
} | |
} | |
else | |
{ | |
t0 = 0; | |
t1 = x->d[0]; | |
t2 = x->d[1]; | |
t3 = x->d[2]; | |
t4 = x->d[3]; | |
u0 = 0; | |
u1 = y->d[0]; | |
u2 = y->d[1]; | |
u3 = y->d[2]; | |
u4 = y->d[3]; | |
if (delta == 0) | |
{ | |
int cmp; | |
if (t3 > u3) | |
cmp = 1; | |
else if (t3 < u3) | |
cmp = -1; | |
else | |
/* todo: ensure return is 1 or -1 */ | |
cmp = mpn_cmp(x->d, y->d, 3); | |
if (cmp == 0) | |
return _mpf4_zero(res, ctx); | |
if (cmp > 1) | |
sub_ddddmmmmssss(t3, t2, t1, t0, t3, t2, t1, t0, u3, u2, u1, u0); | |
else | |
sub_ddddmmmmssss(t3, t2, t1, t0, u3, u2, u1, u0, t3, t2, t1, t0); | |
if (t3 < (UWORD(1) << (FLINT_BITS - 2))) | |
{ | |
/* todo */ | |
flint_abort(); | |
} | |
res->sign = xsign; | |
res->exp = xexp + 1; | |
res->d[0] = (t0 >> 1) | (t1 << (FLINT_BITS - 1)); | |
res->d[1] = (t1 >> 1) | (t2 << (FLINT_BITS - 1)); | |
res->d[2] = (t2 >> 1) | (t3 << (FLINT_BITS - 1)); | |
res->d[3] = (t3 >> 1) | (UWORD(1) << (FLINT_BITS - 1)); | |
} | |
else if (delta < FLINT_BITS) | |
{ | |
u0 = (u1 << (FLINT_BITS - delta)); | |
u1 = (u1 >> delta) | (u2 << (FLINT_BITS - delta)); | |
u2 = (u2 >> delta) | (u3 << (FLINT_BITS - delta)); | |
u3 = (u3 >> delta) | (u4 << (FLINT_BITS - delta)); | |
u4 = (u4 >> delta); | |
} | |
else | |
{ | |
flint_abort(); | |
} | |
sub_dddddmmmmmsssss(t4, t3, t2, t1, t0, t4, t3, t2, t1, t0, u4, u3, u2, u1, u0); | |
if (t4 == 0) | |
{ | |
/* todo */ | |
flint_abort(); | |
} | |
if (t4 >> (FLINT_BITS - 1)) | |
{ | |
res->sign = xsign; | |
res->exp = xexp; | |
res->d[0] = t1; | |
res->d[1] = t2; | |
res->d[2] = t3; | |
res->d[3] = t4; | |
} | |
else | |
{ | |
slong shift = flint_clz(t4); | |
res->sign = xsign; | |
res->exp = xexp - shift; | |
res->d[0] = (t1 << shift) | (t0 >> (FLINT_BITS - shift)); | |
res->d[1] = (t2 << shift) | (t1 >> (FLINT_BITS - shift)); | |
res->d[2] = (t3 << shift) | (t2 >> (FLINT_BITS - shift)); | |
res->d[3] = (t4 << shift) | (t3 >> (FLINT_BITS - shift)); | |
} | |
} | |
return GR_SUCCESS; | |
} | |
mp_limb_pair_t flint_mpn_mulhigh_normalised_4(mp_ptr, mp_srcptr, mp_srcptr); | |
__attribute__((noinline)) int | |
_mpf4_mul(mpf4_ptr res, mpf4_srcptr x, mpf4_srcptr y, gr_ctx_t ctx) | |
{ | |
slong xexp, yexp; | |
slong xsign, ysign; | |
mp_limb_pair_t ret; | |
xsign = x->sign; | |
ysign = y->sign; | |
if (xsign == 0 || ysign == 0) | |
return _mpf4_zero(res, ctx); | |
xexp = x->exp; | |
yexp = y->exp; | |
ret = flint_mpn_mulhigh_normalised(res->d, x->d, y->d, 4); | |
res->exp = xexp + yexp - (slong) ret.m2; | |
res->sign = x->sign * y->sign; | |
return GR_SUCCESS; | |
} | |
#include "profiler.h" | |
int main() | |
{ | |
arf_t a, b, c, d; | |
mpf4_t x, y, z; | |
arf_init(a); | |
arf_init(b); | |
arf_init(c); | |
arf_init(d); | |
arf_sqrt_ui(a, 2, 256, ARF_RND_DOWN); | |
arf_sqrt_ui(b, 3, 256, ARF_RND_DOWN); | |
_mpf4_set_arf(x, a, NULL); | |
_mpf4_set_arf(y, b, NULL); | |
slong i; | |
TIMEIT_START | |
arf_set(c, a); | |
for (i = 0; i < 1000; i++) | |
arf_add(c, c, b, 256, ARF_RND_DOWN); | |
TIMEIT_STOP | |
TIMEIT_START | |
_mpf4_set(z, x, NULL); | |
for (i = 0; i < 1000; i++) | |
_mpf4_add(z, z, y, NULL); | |
TIMEIT_STOP | |
_mpf4_get_arf(d, z, NULL); | |
arf_printd(c, 76); flint_printf("\n"); | |
arf_printd(d, 76); flint_printf("\n"); | |
_mpf4_set_arf(x, a, NULL); | |
_mpf4_set_arf(y, b, NULL); | |
TIMEIT_START | |
arf_set(c, a); | |
for (i = 0; i < 1000; i++) | |
arf_mul(c, c, b, 256, ARF_RND_DOWN); | |
TIMEIT_STOP | |
TIMEIT_START | |
_mpf4_set(z, x, NULL); | |
for (i = 0; i < 1000; i++) | |
_mpf4_mul(z, z, y, NULL); | |
TIMEIT_STOP | |
_mpf4_get_arf(d, z, NULL); | |
arf_printd(c, 76); flint_printf("\n"); | |
arf_printd(d, 76); flint_printf("\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment