Last active
December 18, 2015 05:29
Revisions
-
sh1boot revised this gist
Jun 11, 2013 . 1 changed file with 42 additions and 18 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -45,24 +45,14 @@ uint32_t divfn(uint16_t i, uint16_t j) * it's within bounds. In this case, we'll use the fractional part (k) for * linear interpolation, and the integer part for the index. */ t = (uint32_t)j << 8; while (t >= ((uint32_t)1 << (TABBITS + 8))) { t >>= 1; shift++; } j = t >> 8; k = (uint8_t)t; /* look up (0x80000000 / j) */ t = table2[j]; @@ -86,14 +76,48 @@ uint32_t divfn(uint16_t i, uint16_t j) t += m; } /* compute i * (0x100000000/j) >> 16 for a 16.16 fixed-point value i/j. * If we had to shift j right, earlier, then we'll shift the result right * as well, because larger should give us a smaller result. */ { uint16_t tmp; uint8_t b[4]; uint16_t hi; #if 0 /* correct, but ineffective */ tmp = ((uint16_t)((uint8_t)i) * (uint8_t)t) >> 8; #else tmp = 0x80; #endif tmp += (uint16_t)((uint8_t)i) * (uint8_t)(t >> 8); hi = (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)t; if (shift < 1) hi += 0x80; tmp += (uint8_t)hi; tmp >>= 8; tmp += hi >> 8; tmp += ((uint8_t)i) * (uint8_t)(t >> 16); b[0] = (uint8_t)tmp; tmp >>= 8; tmp += ((uint8_t)i) * (uint8_t)(t >> 24); b[1] = (uint8_t)tmp; b[2] = (uint8_t)(tmp >> 8); tmp = b[0] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 8); b[0] = (uint8_t)tmp; tmp >>= 8; tmp += b[1] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 16); b[1] = (uint8_t)tmp; tmp >>= 8; tmp += b[2] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 24); b[2] = (uint8_t)tmp; b[3] = (uint8_t)(tmp >> 8); t = b[0] | (uint32_t)b[1] << 8 | (uint32_t)b[2] << 16 | (uint32_t)b[3] << 24; } if (shift == 0) return t; while (shift-- > 1) t >>= 1; @@ -117,11 +141,11 @@ int main(void) printf("\n}\n"); #endif for (j = 1; j < 65536; j += 3) for (i = 0; i < 65536; i += 3) { uint32_t ref = (uint32_t)(((uint32_t)i << 16) + (j / 2)) / j; #if 0 uint32_t tabent = (0x80000000 + (j / 2)) / j; uint32_t test = ((uint64_t)i * tabent + 0x4000) >> 15; #else -
sh1boot revised this gist
Jun 10, 2013 . 1 changed file with 16 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -30,6 +30,22 @@ static const uint32_t table2[256 + 1] __attribute__((progmem)) = { 0x01111111, 0x010fef01, 0x010ecf57, 0x010db20b, 0x010c9715, 0x010b7e6f, 0x010a6811, 0x010953f4, 0x01084211, 0x01073261, 0x010624dd, 0x0105197f, 0x01041041, 0x0103091b, 0x01020408, 0x01010101, 0x01000000, }; #endif/*}}}*/ uint32_t divfn(uint16_t i, uint16_t j) { uint32_t t; uint8_t shift = 0; uint8_t k; if (j <= 1) return (int32_t)i << 16; /* if j is too large to index the table, then we just shift it right until * it's within bounds. In this case, we'll use the fractional part (k) for * linear interpolation, and the integer part for the index. */ #if 1 t = (uint32_t)j << 8; while (t >= ((uint32_t)256 << TABBITS)) { -
sh1boot revised this gist
Jun 10, 2013 . 1 changed file with 75 additions and 53 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,37 +1,52 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> #define TABBITS 8 #if 0 extern uint32_t divfn(uint16_t i, uint16_t j); #else #if TABBITS != 8 uint32_t table2[(1 << TABBITS) + 1]; #else/*{{{*/ static const uint32_t table2[256 + 1] __attribute__((progmem)) = { 0xffffffff, 0x00000000, 0x80000000, 0x55555555, 0x40000000, 0x33333333, 0x2aaaaaab, 0x24924925, 0x20000000, 0x1c71c71c, 0x1999999a, 0x1745d174, 0x15555555, 0x13b13b14, 0x12492492, 0x11111111, 0x10000000, 0x0f0f0f0f, 0x0e38e38e, 0x0d79435e, 0x0ccccccd, 0x0c30c30c, 0x0ba2e8ba, 0x0b21642d, 0x0aaaaaab, 0x0a3d70a4, 0x09d89d8a, 0x097b425f, 0x09249249, 0x08d3dcb1, 0x08888889, 0x08421084, 0x08000000, 0x07c1f07c, 0x07878788, 0x07507507, 0x071c71c7, 0x06eb3e45, 0x06bca1af, 0x06906907, 0x06666666, 0x063e7064, 0x06186186, 0x05f417d0, 0x05d1745d, 0x05b05b06, 0x0590b216, 0x0572620b, 0x05555555, 0x0539782a, 0x051eb852, 0x05050505, 0x04ec4ec5, 0x04d4873f, 0x04bda12f, 0x04a7904a, 0x04924925, 0x047dc11f, 0x0469ee58, 0x0456c798, 0x04444444, 0x04325c54, 0x04210842, 0x04104104, 0x04000000, 0x03f03f04, 0x03e0f83e, 0x03d22635, 0x03c3c3c4, 0x03b5cc0f, 0x03a83a84, 0x039b0ad1, 0x038e38e4, 0x0381c0e0, 0x03759f23, 0x0369d037, 0x035e50d8, 0x03531dec, 0x03483483, 0x033d91d3, 0x03333333, 0x03291620, 0x031f3832, 0x03159722, 0x030c30c3, 0x03030303, 0x02fa0be8, 0x02f14990, 0x02e8ba2f, 0x02e05c0c, 0x02d82d83, 0x02d02d03, 0x02c8590b, 0x02c0b02c, 0x02b93105, 0x02b1da46, 0x02aaaaab, 0x02a3a0fd, 0x029cbc15, 0x0295fad4, 0x028f5c29, 0x0288df0d, 0x02828283, 0x027c4598, 0x02762762, 0x02702702, 0x026a439f, 0x02647c69, 0x025ed098, 0x02593f6a, 0x0253c825, 0x024e6a17, 0x02492492, 0x0243f6f0, 0x023ee090, 0x0239e0d6, 0x0234f72c, 0x02302302, 0x022b63cc, 0x0226b902, 0x02222222, 0x021d9ead, 0x02192e2a, 0x0214d021, 0x02108421, 0x020c49ba, 0x02082082, 0x02040810, 0x02000000, 0x01fc07f0, 0x01f81f82, 0x01f4465a, 0x01f07c1f, 0x01ecc07b, 0x01e9131b, 0x01e573ad, 0x01e1e1e2, 0x01de5d6e, 0x01dae607, 0x01d77b65, 0x01d41d42, 0x01d0cb59, 0x01cd8569, 0x01ca4b30, 0x01c71c72, 0x01c3f8f0, 0x01c0e070, 0x01bdd2b9, 0x01bacf91, 0x01b7d6c4, 0x01b4e81b, 0x01b20364, 0x01af286c, 0x01ac5702, 0x01a98ef6, 0x01a6d01a, 0x01a41a42, 0x01a16d40, 0x019ec8e9, 0x019c2d15, 0x0199999a, 0x01970e50, 0x01948b10, 0x01920fb5, 0x018f9c19, 0x018d3019, 0x018acb91, 0x01886e5f, 0x01861862, 0x0183c978, 0x01818182, 0x017f4060, 0x017d05f4, 0x017ad221, 0x0178a4c8, 0x01767dce, 0x01745d17, 0x01724288, 0x01702e06, 0x016e1f77, 0x016c16c1, 0x016a13cd, 0x01681681, 0x01661ec7, 0x01642c86, 0x01623fa7, 0x01605816, 0x015e75bc, 0x015c9883, 0x015ac057, 0x0158ed23, 0x01571ed4, 0x01555555, 0x01539095, 0x0151d07f, 0x01501501, 0x014e5e0a, 0x014cab88, 0x014afd6a, 0x0149539e, 0x0147ae14, 0x01460cbc, 0x01446f86, 0x0142d662, 0x01414141, 0x013fb014, 0x013e22cc, 0x013c995a, 0x013b13b1, 0x013991c3, 0x01381381, 0x013698df, 0x013521d0, 0x0133ae46, 0x01323e35, 0x0130d190, 0x012f684c, 0x012e025c, 0x012c9fb5, 0x012b404b, 0x0129e413, 0x01288b01, 0x0127350c, 0x0125e227, 0x01249249, 0x01234568, 0x0121fb78, 0x0120b471, 0x011f7048, 0x011e2ef4, 0x011cf06b, 0x011bb4a4, 0x011a7b96, 0x01194538, 0x01181181, 0x0116e069, 0x0115b1e6, 0x011485f1, 0x01135c81, 0x0112358e, 0x01111111, 0x010fef01, 0x010ecf57, 0x010db20b, 0x010c9715, 0x010b7e6f, 0x010a6811, 0x010953f4, 0x01084211, 0x01073261, 0x010624dd, 0x0105197f, 0x01041041, 0x0103091b, 0x01020408, 0x01010101, 0x01000000, }; t = (uint32_t)j << 8; while (t >= ((uint32_t)256 << TABBITS)) { t >>= 1; shift++; } j = t >> 8; k = (uint8_t)t; #else k = 0; while (j >= (1 << TABBITS)) { k = ((j << 7) & 0x80) | (k >> 1); j >>= 1; shift++; } #endif /* look up (0x80000000 / j) */ t = table2[j]; @@ -42,57 +57,64 @@ uint32_t divfn(uint32_t i, uint32_t j, int v) * shifted down and interpolated represent an especially linear part of * the table. */ uint16_t tmp; uint32_t n = table2[j + 1] - t; uint32_t m; tmp = (uint16_t)((uint8_t)(n >> 0)) * k; tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 8)) * k; m = (uint8_t)tmp; tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 16)) * k; m |= (uint32_t)((uint8_t)tmp) << 8; tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 24)) * k; m |= (uint32_t)((uint8_t)tmp) << 16; t += m; } /* compute i * (0x80000000/j) >> 15 for a 16.16 fixed-point value for i/j. * If we had to shift j right, earlier, then we'll shift the result right * as well, because larger should give us a smaller result. */ if (shift == 0) return ((uint64_t)i * t + 0x80000) >> 16; t = (uint64_t)i * t >> 16; while (shift-- > 1) t >>= 1; t++; return t >> 1; } #endif int main(void) { unsigned long i, j; uint64_t err = 0; #if TABBITS != 8 printf("static const uint32_t table2[%d + 1] __attribute__((progmem)) = {\n0xffffffff, ", 1 << TABBITS); for (j = 1; j < (1 << TABBITS) + 1; j++) { table2[j] = (0x100000000u + (j / 2)) / j; printf("0x%08x,%c", table2[j], ((j & 15) == 15) ? '\n' : ' '); } printf("\n}\n"); #endif for (j = 1; j < 65536; j += 7) for (i = 0; i < 65536; i += 101) { uint32_t ref = (uint32_t)(((uint32_t)i << 16) + (j / 2)) / j; #if 1 uint32_t tabent = (0x80000000 + (j / 2)) / j; uint32_t test = ((uint64_t)i * tabent + 0x4000) >> 15; #else uint32_t test = divfn(i, j); #endif int32_t e = (int32_t)(ref - test); if (e > 1) printf("%u/%u==%lu (0x%lx) (not %lu (0x%lx)) error: %ld\n", (unsigned)i, (unsigned)j, ref, ref, test, test, e); err += abs(e); } return 0; } -
sh1boot revised this gist
Jun 10, 2013 . 1 changed file with 33 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> @@ -15,21 +16,49 @@ uint32_t divfn(uint32_t i, uint32_t j, int v) uint8_t shift = 0; uint16_t k; /* convert j to a 16.8 fixed-point number */ j <<= 8; /* if j is too large to index the table, then we just shift it right until * it's within bounds. */ while (j >= (256 << TABBITS)) { j >>= 1; shift++; } /* If j has been shifted right, we'll use the fractional part for linear * interpolation, and the integer part for the index. */ k = j & 255; j >>= 8; /* look up (0x80000000 / j) */ t = table2[j]; if (k) { /* Linearly interpolate between adjacent table entries. Note that this * works well because the set of values which are large and need to be * shifted down and interpolated represent an especially linear part of * the table. */ t += (uint64_t)(table2[j + 1] - t) * k >> 8; } /* rounding factor to properly balance the right shift. In assembly this can * be replaced with an addition of the resulting carry bit after the last * right shift. */ r = 0x4000 << shift; /* debug */ if (v) printf("r=%04x, j=%04x, t=%08x t>>s=%08x, (T=%08x), shift=%d\n", r, j, t, t >> shift, table[v], shift); /* compute i * (0x80000000/j) >> 15 for a 16.16 fixed-point value for i/j. * If we had to shift j right, earlier, then we'll shift the result right * as well, because larger should give us a smaller result. */ return ((uint64_t)i * t + r) >> (15 + shift); } -
sh1boot created this gist
Jun 7, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,69 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> uint32_t table[65536]; #define TABBITS 8 uint32_t table2[(1 << TABBITS) + 1]; uint32_t divfn(uint32_t i, uint32_t j, int v) { uint32_t t; uint32_t r; uint8_t shift = 0; uint16_t k; k = 0; while (j >= (1 << TABBITS)) { k |= (j & 1) << shift; j >>= 1; shift++; } t = table2[j]; t += (uint64_t)(table2[j + 1] - t) * k >> shift; r = 0x4000 << shift; if (v) printf("r=%04x, j=%04x, t=%08x t>>s=%08x, (T=%08x), shift=%d\n", r, j, t, t >> shift, table[v], shift); return ((uint64_t)i * t + r) >> (15 + shift); } int main(void) { int i, j; uint64_t err = 0; for (j = 1; j < (1 << TABBITS) + 3; j++) { table2[j] = (0x80000000u + (j / 2)) / j; } for (j = 1; j < 65536; j += 15) { table[j] = (0x80000000u + (j / 2)) / j; for (i = 0; i < 65536; i += 1) { uint32_t ref = (uint32_t)((i << 16) + (j / 2)) / j; #if 0 uint32_t test = ((uint64_t)i * table[j] + 0x4000) >> 15; #else uint32_t test = divfn(i, j, 0); #endif int32_t e = (int32_t)(ref - test); if (e > 1) { (void)divfn(i, j, j); printf("%d/%d==%u (0x%x) (not %u (0x%x)) error: %d\n", i, j, ref, ref, test, test, e); } err += abs(e); } } return 0; }