Last active
April 3, 2022 11:37
-
-
Save TekuConcept/8bb0b328fa89f1de3a5802356dd1d360 to your computer and use it in GitHub Desktop.
Fixed-Point Trigonometric Functions & Other Fun Stuff
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Created by TekuConcept on September 23, 2020 | |
*/ | |
#include <iostream> | |
#include <iomanip> | |
#include <cmath> | |
#include <cstdint> | |
// Q10(256) = 262144 := BPI (proportional representation of 2pi as a power of 2) | |
// Q10(M_PI) = 3217 (Actual Q10 value of pi to compare; 3.14159... * (1 << 10)) | |
#define Q10_BPI_4 0x08000 // pi/4 | |
#define Q10_BPI_2 0x10000 // pi/2 | |
#define Q10_3BPI_4 0x18000 // 3pi/4 | |
#define Q10_BPI 0x20000 // pi | |
#define Q10_2BPI 0x40000 // 2pi | |
/*********************************************************************************\ | |
* Fixed-Point Sqrt * | |
* Input: x in Q10 * | |
* Output: y in Q10 * | |
* Note: precision represents actual precision left-shift one * | |
* * | |
* std::sqrt(n) = fp_sqrt(n << 10) / (float)(1 << 10) // +/-0.0005 * | |
* * | |
* Based on: https://github.com/chmike/fpsqrt * | |
* and: "Fixed Point Square Root" from http://citeseerx.ist.psu.edu/ * | |
\*********************************************************************************/ | |
int32_t fp_sqrt(int32_t x, int8_t s precision = 20) | |
{ | |
uint32_t testDiv, root, count, rem; | |
rem = x; | |
root = 0; | |
count = 0x10 << precision; | |
do { | |
testDiv = root + count; | |
if (rem >= testDiv) { | |
rem -= testDiv; | |
root = testDiv + count; | |
} | |
rem <<= 1; | |
count >>= 1; | |
} while (count > 0x40); | |
return root >> 8; | |
} | |
inline int32_t fp_invsqrt(int32_t x) { return ((0x400 << 10) / fp_sqrt(x)); } // not tested | |
/*********************************************************************************\ | |
* Fixed-Point Log2 * | |
* Input: x in Q10 * | |
* Output: y in Q10 * | |
* * | |
* std::sqrt(n) = fp_sqrt(n << 10) / (float)(1 << 10) // +/-0.0005 * | |
* * | |
* Based on: https://github.com/dmoulding/log2fix * | |
* and: https://stackoverflow.com/questions/4657468 * | |
\*********************************************************************************/ | |
int32_t fp_log2(uint32_t x, int8_t precision = 10) | |
{ | |
int32_t b = 1 << (precision - 1); | |
int32_t y = 0, i; | |
uint64_t z = x; | |
if (x == 0) return 0xFFFFFFFF; | |
while (x < (1 << precision)) { | |
x <<= 1; | |
y -= (1 << precision); | |
} | |
while (x >= (2 << precision)) { | |
x >>= 1; | |
y += (1 << precision); | |
} | |
for (i = 0; i < precision; i++) { | |
z = z * z >> precision; | |
if (z >= 2 << (uint64_t)precision) { | |
z >>= 1; | |
y += b; | |
} | |
b >>= 1; | |
} | |
return y; | |
} | |
inline int32_t fp_log(int32_t x) { return (fp_log2(x) * (uint64_t)0x268826a1) >> 31; }; | |
inline int32_t fp_ln(int32_t x) { return (fp_log2(x) * (uint64_t)0x58b90bfc) >> 31; }; | |
/*********************************************************************************\ | |
* Fixed-Point Sine * | |
* Input: x in Q10 fp-radians [rad * (256 << 10) / (2pi)] * | |
* Output: y in Q10 * | |
* * | |
* std::sin(theta) = fp_sin(theta * (256<<10) / (2*M_PI)) +/-0.012831 * | |
* * | |
* fp_cos, fp_tan, fp_sec, fp_csc, fp_cot can all be derived from fp_sin * | |
* Based on: https://www.coranac.com/2009/07/sines/ * | |
* Fifth-order variant can be found here: https://www.nullhardware.com/ * | |
\*********************************************************************************/ | |
int32_t | |
fp_sin(int32_t x) | |
{ | |
// S(x) = x * ( (3 << p) - (x * x >> r) ) >> s | |
// n : Q-pos for quarter circle 13 | |
// A : Q-pos for output 12 | |
// p : Q-pos for parentheses intermediate 15 | |
// r = 2n-p 11 | |
// s = A-1-p-n 17 | |
static const int | |
qN = 16, | |
qA = 10, | |
qP = 15, | |
qR = 2 * qN - qP, | |
qS = qN + qP + 1 - qA; | |
x = x << (30 - qN); // shift to full s32 range (Q13->Q30) | |
if((x ^ (x << 1)) < 0) // test for quadrant 1 or 2 | |
x = (1 << 31) - x; | |
x = x >> (30 - qN); | |
// 0.5z(3-z^2); z = 2x/pi | |
return x * ((3 << qP) - ((int64_t)x * x >> qR)) >> qS; | |
} | |
inline int32_t fp_cos(int32_t x) { return fp_sin(x + Q10_BPI_2); } | |
inline int32_t fp_tan(int32_t x) { return (((int64_t)fp_sin(x) << 10) / fp_cos(x)); } | |
inline int32_t fp_sec(int32_t x) { return ((0x400 << 10) / fp_cos(x)); } | |
inline int32_t fp_csc(int32_t x) { return ((0x400 << 10) / fp_sin(x)); } | |
inline int32_t fp_cot(int32_t x) { return (((int64_t)fp_cos(x) << 10) / fp_sin(x)); } | |
/*********************************************************************************\ | |
* Fixed-Point ArcCos * | |
* Input: x in Q10 * | |
* Output: y in Q10 fp-radians [rad * (256 << 10) / (2pi)] * | |
* * | |
* std::acos(x) = fp_acos(x) / (float)(1 << 10) * | |
* * | |
* fp_asin can be derived from fp_acos * | |
* Based on: https://stackoverflow.com/questions/7378187/ * | |
* Originally from https://developer.nvidia.com/ * | |
\*********************************************************************************/ | |
int32_t | |
fp_acos(int32_t x) { | |
static const int8_t precision = 10; | |
int8_t sign = (x < 0) ? -1 : 1; | |
int64_t abs_x, y; | |
abs_x = abs(x); | |
abs_x %= (1 << precision) + 1; | |
// right-shift to preserve | |
// precision after multiply | |
y = ((-19 * abs_x) >> precision) + 76; | |
y = ((y * abs_x) >> precision) - 217; | |
y = ((y * abs_x) >> precision) + 1608; | |
y = (y * fp_sqrt(1024 - abs_x)) >> 10; | |
y = y + (2 * sign * y); | |
y = y + sign * 3217; | |
return y; | |
} | |
inline int32_t fp_asin(int32_t x) { return (fp_acos(-x) - Q10_BPI_2); } | |
/*********************************************************************************\ | |
* Fixed-Point ArcTan2 * | |
* Input: x in Q10 fp-radians [rad * (256 << 10) / (2pi)] * | |
* Output: y in Q10 * | |
* * | |
* std::atan2(y, x) = fp_atan2(fp_y, fp_x) / (float)(1 << 10) +/-0.00885 * | |
* * | |
* fp_asin, fp_acos, fp_asec, fp_acsc, fp_acot can all be derived from fp_atan2 * | |
* Based on: https://developer.download.nvidia.com/cg/atan2.html * | |
\*********************************************************************************/ | |
int32_t | |
fp_atan2(int32_t y, int32_t x) | |
{ | |
static const int8_t precision = 10; | |
int32_t t0, t1, t2, t3, t4; | |
t3 = abs(x); | |
t1 = abs(y); | |
t0 = (t3 > t1) ? t3 : t1; | |
t1 = (t3 < t1) ? t3 : t1; | |
t3 = (0x400 << precision) / t0; | |
t3 = (t3 * t1) >> precision; | |
t4 = (t3 * t3) >> precision; | |
t0 = - 14; // Q10(0.013480470) = 14 | |
t0 = ((t0 * t4)>>precision) + 59; // Q10(0.057477314) = 59 | |
t0 = ((t0 * t4)>>precision) - 124; // Q10(0.121239071) = 124 | |
t0 = ((t0 * t4)>>precision) + 200; // Q10(0.195635925) = 200 | |
t0 = ((t0 * t4)>>precision) - 341; // Q10(0.332994597) = 341 | |
t0 = ((t0 * t4)>>precision) + 1024; // Q10(0.999995630) = 1024 | |
t3 = ((t0 * t3)>>precision); | |
t3 = (abs(y) > abs(x)) ? 1608 - t3 : t3; // Q10(1.570796327) = 1608 | |
t3 = (x < 0) ? 3217 - t3 : t3; // Q10(3.141592654) = 3217 | |
t3 = (y < 0) ? -t3 : t3; | |
return t3; | |
} | |
// fp_asin(x) := fp_atan2(x, fp_sqrt(1 - x * x)) | |
// fp_acos(x) := fp_atan2(fp_sqrt(1 - x * x), x) | |
inline int32_t fp_atan(int32_t x) { return fp_atan2(x, 0x400); } // not tested - theoretical | |
inline int32_t fp_asec(int32_t x) { return fp_acos((0x400 << 10) / x); } | |
inline int32_t fp_acsc(int32_t x) { return fp_asin((0x400 << 10) / x); } | |
inline int32_t fp_acot(x) { return (fp_atan(x) - (x > 0 ? 0 : Q10_BPI)); } | |
#include <iostream> | |
#include <iomanip> | |
#define DCELL(c) std::dec << std::left << std::setw(c) | |
int main() { | |
std::cout << "-- BEGIN DEMO -- \n"; | |
int32_t bigree = 1 << 10; // binary-degree | |
int32_t full_revolution = bigree * 256; | |
for (int32_t a = 0; a < (full_revolution + bigree * 10); a += (2 * bigree)) { | |
float fx = std::cos(i * 2 * M_PI / (float)full_revolution); | |
float fy = std::sin(i * 2 * M_PI / (float)full_revolution); | |
int32_t ix = fp_cos(i); | |
int32_t iy = fp_sin(i); | |
// -- fp_sqrt example -- | |
std::cout << DCELL(12) << std::sqrt(a / (float)bigree); | |
std::cout << DCELL(12) << (fp_sqrt(a) / (float)bigree); | |
// -- fp_log2 example -- | |
std::cout << DCELL(12) << std::log2(i / (float)bigree); | |
std::cout << DCELL(12) << (fp_log2(i) / (float)bigree); | |
// -- fp_sin example -- | |
std::cout << DCELL(12) << std::sin(i * 2 * M_PI / (float)full_revolution); | |
std::cout << DCELL(12) << (fp_sin(i) / (float)bigree); | |
// -- fp_acos example -- | |
std::cout << DCELL(12) << std::acos(fx); | |
std::cout << DCELL(12) << (fp_acos(ix) / (float)(1 << 10)); | |
// -- fp_atan2 example -- | |
std::cout << DCELL(12) << std::atan2(fy, fx); | |
std::cout << DCELL(12) << (fp_atan2(iy, ix) / (float)bigree); | |
} | |
std::cout << "-- END OF DEMO --" << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment