Skip to content

Instantly share code, notes, and snippets.

@sh1boot
Last active December 18, 2015 05:29

Revisions

  1. sh1boot revised this gist Jun 11, 2013. 1 changed file with 42 additions and 18 deletions.
    60 changes: 42 additions & 18 deletions gistfile1.c
    Original file line number Diff line number Diff line change
    @@ -45,24 +45,14 @@ uint32_t divfn(uint16_t i, uint16_t j)
    * it's within bounds. In this case, we'll use the fractional part (k) for
    * linear interpolation, and the integer part for the index.
    */
    #if 1
    t = (uint32_t)j << 8;
    while (t >= ((uint32_t)256 << TABBITS))
    while (t >= ((uint32_t)1 << (TABBITS + 8)))
    {
    t >>= 1;
    shift++;
    }
    j = t >> 8;
    k = (uint8_t)t;
    #else
    k = 0;
    while (j >= (1 << TABBITS))
    {
    k = ((j << 7) & 0x80) | (k >> 1);
    j >>= 1;
    shift++;
    }
    #endif

    /* look up (0x80000000 / j) */
    t = table2[j];
    @@ -86,14 +76,48 @@ uint32_t divfn(uint16_t i, uint16_t j)
    t += m;
    }

    /* compute i * (0x80000000/j) >> 15 for a 16.16 fixed-point value for i/j.
    /* compute i * (0x100000000/j) >> 16 for a 16.16 fixed-point value i/j.
    * If we had to shift j right, earlier, then we'll shift the result right
    * as well, because larger should give us a smaller result.
    */
    if (shift == 0)
    return ((uint64_t)i * t + 0x80000) >> 16;
    {
    uint16_t tmp;
    uint8_t b[4];
    uint16_t hi;
    #if 0 /* correct, but ineffective */
    tmp = ((uint16_t)((uint8_t)i) * (uint8_t)t) >> 8;
    #else
    tmp = 0x80;
    #endif
    tmp += (uint16_t)((uint8_t)i) * (uint8_t)(t >> 8);
    hi = (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)t;
    if (shift < 1)
    hi += 0x80;
    tmp += (uint8_t)hi;
    tmp >>= 8;
    tmp += hi >> 8;

    t = (uint64_t)i * t >> 16;
    tmp += ((uint8_t)i) * (uint8_t)(t >> 16);
    b[0] = (uint8_t)tmp;
    tmp >>= 8;
    tmp += ((uint8_t)i) * (uint8_t)(t >> 24);
    b[1] = (uint8_t)tmp;
    b[2] = (uint8_t)(tmp >> 8);

    tmp = b[0] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 8);
    b[0] = (uint8_t)tmp;
    tmp >>= 8;
    tmp += b[1] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 16);
    b[1] = (uint8_t)tmp;
    tmp >>= 8;
    tmp += b[2] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 24);
    b[2] = (uint8_t)tmp;
    b[3] = (uint8_t)(tmp >> 8);

    t = b[0] | (uint32_t)b[1] << 8 | (uint32_t)b[2] << 16 | (uint32_t)b[3] << 24;
    }
    if (shift == 0)
    return t;

    while (shift-- > 1)
    t >>= 1;
    @@ -117,11 +141,11 @@ int main(void)
    printf("\n}\n");
    #endif

    for (j = 1; j < 65536; j += 7)
    for (i = 0; i < 65536; i += 101)
    for (j = 1; j < 65536; j += 3)
    for (i = 0; i < 65536; i += 3)
    {
    uint32_t ref = (uint32_t)(((uint32_t)i << 16) + (j / 2)) / j;
    #if 1
    #if 0
    uint32_t tabent = (0x80000000 + (j / 2)) / j;
    uint32_t test = ((uint64_t)i * tabent + 0x4000) >> 15;
    #else
  2. sh1boot revised this gist Jun 10, 2013. 1 changed file with 16 additions and 0 deletions.
    16 changes: 16 additions & 0 deletions gistfile1.c
    Original file line number Diff line number Diff line change
    @@ -30,6 +30,22 @@ static const uint32_t table2[256 + 1] __attribute__((progmem)) = {
    0x01111111, 0x010fef01, 0x010ecf57, 0x010db20b, 0x010c9715, 0x010b7e6f, 0x010a6811, 0x010953f4, 0x01084211, 0x01073261, 0x010624dd, 0x0105197f, 0x01041041, 0x0103091b, 0x01020408, 0x01010101,
    0x01000000,
    };
    #endif/*}}}*/

    uint32_t divfn(uint16_t i, uint16_t j)
    {
    uint32_t t;
    uint8_t shift = 0;
    uint8_t k;

    if (j <= 1)
    return (int32_t)i << 16;

    /* if j is too large to index the table, then we just shift it right until
    * it's within bounds. In this case, we'll use the fractional part (k) for
    * linear interpolation, and the integer part for the index.
    */
    #if 1
    t = (uint32_t)j << 8;
    while (t >= ((uint32_t)256 << TABBITS))
    {
  3. sh1boot revised this gist Jun 10, 2013. 1 changed file with 75 additions and 53 deletions.
    128 changes: 75 additions & 53 deletions gistfile1.c
    Original file line number Diff line number Diff line change
    @@ -1,37 +1,52 @@

    #include <stdio.h>
    #include <stdlib.h>
    #include <stdint.h>

    uint32_t table[65536];

    #define TABBITS 8

    uint32_t table2[(1 << TABBITS) + 1];

    uint32_t divfn(uint32_t i, uint32_t j, int v)
    {
    uint32_t t;
    uint32_t r;
    uint8_t shift = 0;
    uint16_t k;

    /* convert j to a 16.8 fixed-point number */
    j <<= 8;
    #if 0
    extern uint32_t divfn(uint16_t i, uint16_t j);
    #else

    /* if j is too large to index the table, then we just shift it right until
    * it's within bounds.
    */
    while (j >= (256 << TABBITS))
    #if TABBITS != 8
    uint32_t table2[(1 << TABBITS) + 1];
    #else/*{{{*/
    static const uint32_t table2[256 + 1] __attribute__((progmem)) = {
    0xffffffff, 0x00000000, 0x80000000, 0x55555555, 0x40000000, 0x33333333, 0x2aaaaaab, 0x24924925, 0x20000000, 0x1c71c71c, 0x1999999a, 0x1745d174, 0x15555555, 0x13b13b14, 0x12492492, 0x11111111,
    0x10000000, 0x0f0f0f0f, 0x0e38e38e, 0x0d79435e, 0x0ccccccd, 0x0c30c30c, 0x0ba2e8ba, 0x0b21642d, 0x0aaaaaab, 0x0a3d70a4, 0x09d89d8a, 0x097b425f, 0x09249249, 0x08d3dcb1, 0x08888889, 0x08421084,
    0x08000000, 0x07c1f07c, 0x07878788, 0x07507507, 0x071c71c7, 0x06eb3e45, 0x06bca1af, 0x06906907, 0x06666666, 0x063e7064, 0x06186186, 0x05f417d0, 0x05d1745d, 0x05b05b06, 0x0590b216, 0x0572620b,
    0x05555555, 0x0539782a, 0x051eb852, 0x05050505, 0x04ec4ec5, 0x04d4873f, 0x04bda12f, 0x04a7904a, 0x04924925, 0x047dc11f, 0x0469ee58, 0x0456c798, 0x04444444, 0x04325c54, 0x04210842, 0x04104104,
    0x04000000, 0x03f03f04, 0x03e0f83e, 0x03d22635, 0x03c3c3c4, 0x03b5cc0f, 0x03a83a84, 0x039b0ad1, 0x038e38e4, 0x0381c0e0, 0x03759f23, 0x0369d037, 0x035e50d8, 0x03531dec, 0x03483483, 0x033d91d3,
    0x03333333, 0x03291620, 0x031f3832, 0x03159722, 0x030c30c3, 0x03030303, 0x02fa0be8, 0x02f14990, 0x02e8ba2f, 0x02e05c0c, 0x02d82d83, 0x02d02d03, 0x02c8590b, 0x02c0b02c, 0x02b93105, 0x02b1da46,
    0x02aaaaab, 0x02a3a0fd, 0x029cbc15, 0x0295fad4, 0x028f5c29, 0x0288df0d, 0x02828283, 0x027c4598, 0x02762762, 0x02702702, 0x026a439f, 0x02647c69, 0x025ed098, 0x02593f6a, 0x0253c825, 0x024e6a17,
    0x02492492, 0x0243f6f0, 0x023ee090, 0x0239e0d6, 0x0234f72c, 0x02302302, 0x022b63cc, 0x0226b902, 0x02222222, 0x021d9ead, 0x02192e2a, 0x0214d021, 0x02108421, 0x020c49ba, 0x02082082, 0x02040810,
    0x02000000, 0x01fc07f0, 0x01f81f82, 0x01f4465a, 0x01f07c1f, 0x01ecc07b, 0x01e9131b, 0x01e573ad, 0x01e1e1e2, 0x01de5d6e, 0x01dae607, 0x01d77b65, 0x01d41d42, 0x01d0cb59, 0x01cd8569, 0x01ca4b30,
    0x01c71c72, 0x01c3f8f0, 0x01c0e070, 0x01bdd2b9, 0x01bacf91, 0x01b7d6c4, 0x01b4e81b, 0x01b20364, 0x01af286c, 0x01ac5702, 0x01a98ef6, 0x01a6d01a, 0x01a41a42, 0x01a16d40, 0x019ec8e9, 0x019c2d15,
    0x0199999a, 0x01970e50, 0x01948b10, 0x01920fb5, 0x018f9c19, 0x018d3019, 0x018acb91, 0x01886e5f, 0x01861862, 0x0183c978, 0x01818182, 0x017f4060, 0x017d05f4, 0x017ad221, 0x0178a4c8, 0x01767dce,
    0x01745d17, 0x01724288, 0x01702e06, 0x016e1f77, 0x016c16c1, 0x016a13cd, 0x01681681, 0x01661ec7, 0x01642c86, 0x01623fa7, 0x01605816, 0x015e75bc, 0x015c9883, 0x015ac057, 0x0158ed23, 0x01571ed4,
    0x01555555, 0x01539095, 0x0151d07f, 0x01501501, 0x014e5e0a, 0x014cab88, 0x014afd6a, 0x0149539e, 0x0147ae14, 0x01460cbc, 0x01446f86, 0x0142d662, 0x01414141, 0x013fb014, 0x013e22cc, 0x013c995a,
    0x013b13b1, 0x013991c3, 0x01381381, 0x013698df, 0x013521d0, 0x0133ae46, 0x01323e35, 0x0130d190, 0x012f684c, 0x012e025c, 0x012c9fb5, 0x012b404b, 0x0129e413, 0x01288b01, 0x0127350c, 0x0125e227,
    0x01249249, 0x01234568, 0x0121fb78, 0x0120b471, 0x011f7048, 0x011e2ef4, 0x011cf06b, 0x011bb4a4, 0x011a7b96, 0x01194538, 0x01181181, 0x0116e069, 0x0115b1e6, 0x011485f1, 0x01135c81, 0x0112358e,
    0x01111111, 0x010fef01, 0x010ecf57, 0x010db20b, 0x010c9715, 0x010b7e6f, 0x010a6811, 0x010953f4, 0x01084211, 0x01073261, 0x010624dd, 0x0105197f, 0x01041041, 0x0103091b, 0x01020408, 0x01010101,
    0x01000000,
    };
    t = (uint32_t)j << 8;
    while (t >= ((uint32_t)256 << TABBITS))
    {
    t >>= 1;
    shift++;
    }
    j = t >> 8;
    k = (uint8_t)t;
    #else
    k = 0;
    while (j >= (1 << TABBITS))
    {
    k = ((j << 7) & 0x80) | (k >> 1);
    j >>= 1;
    shift++;
    }
    /* If j has been shifted right, we'll use the fractional part for linear
    * interpolation, and the integer part for the index.
    */
    k = j & 255;
    j >>= 8;
    #endif

    /* look up (0x80000000 / j) */
    t = table2[j];
    @@ -42,57 +57,64 @@ uint32_t divfn(uint32_t i, uint32_t j, int v)
    * shifted down and interpolated represent an especially linear part of
    * the table.
    */
    t += (uint64_t)(table2[j + 1] - t) * k >> 8;
    uint16_t tmp;
    uint32_t n = table2[j + 1] - t;
    uint32_t m;
    tmp = (uint16_t)((uint8_t)(n >> 0)) * k;
    tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 8)) * k;
    m = (uint8_t)tmp;
    tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 16)) * k;
    m |= (uint32_t)((uint8_t)tmp) << 8;
    tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 24)) * k;
    m |= (uint32_t)((uint8_t)tmp) << 16;
    t += m;
    }

    /* rounding factor to properly balance the right shift. In assembly this can
    * be replaced with an addition of the resulting carry bit after the last
    * right shift.
    */
    r = 0x4000 << shift;

    /* debug */
    if (v)
    printf("r=%04x, j=%04x, t=%08x t>>s=%08x, (T=%08x), shift=%d\n", r, j, t, t >> shift, table[v], shift);

    /* compute i * (0x80000000/j) >> 15 for a 16.16 fixed-point value for i/j.
    * If we had to shift j right, earlier, then we'll shift the result right
    * as well, because larger should give us a smaller result.
    */
    return ((uint64_t)i * t + r) >> (15 + shift);
    if (shift == 0)
    return ((uint64_t)i * t + 0x80000) >> 16;

    t = (uint64_t)i * t >> 16;

    while (shift-- > 1)
    t >>= 1;
    t++;
    return t >> 1;
    }
    #endif

    int main(void)
    {
    int i, j;
    unsigned long i, j;
    uint64_t err = 0;

    for (j = 1; j < (1 << TABBITS) + 3; j++)
    #if TABBITS != 8
    printf("static const uint32_t table2[%d + 1] __attribute__((progmem)) = {\n0xffffffff, ", 1 << TABBITS);
    for (j = 1; j < (1 << TABBITS) + 1; j++)
    {
    table2[j] = (0x80000000u + (j / 2)) / j;
    table2[j] = (0x100000000u + (j / 2)) / j;
    printf("0x%08x,%c", table2[j], ((j & 15) == 15) ? '\n' : ' ');
    }
    printf("\n}\n");
    #endif

    for (j = 1; j < 65536; j += 15)
    {
    table[j] = (0x80000000u + (j / 2)) / j;

    for (i = 0; i < 65536; i += 1)
    for (j = 1; j < 65536; j += 7)
    for (i = 0; i < 65536; i += 101)
    {
    uint32_t ref = (uint32_t)((i << 16) + (j / 2)) / j;
    #if 0
    uint32_t test = ((uint64_t)i * table[j] + 0x4000) >> 15;
    uint32_t ref = (uint32_t)(((uint32_t)i << 16) + (j / 2)) / j;
    #if 1
    uint32_t tabent = (0x80000000 + (j / 2)) / j;
    uint32_t test = ((uint64_t)i * tabent + 0x4000) >> 15;
    #else
    uint32_t test = divfn(i, j, 0);
    uint32_t test = divfn(i, j);
    #endif
    int32_t e = (int32_t)(ref - test);
    if (e > 1)
    {
    (void)divfn(i, j, j);
    printf("%d/%d==%u (0x%x) (not %u (0x%x)) error: %d\n", i, j, ref, ref, test, test, e);
    }
    printf("%u/%u==%lu (0x%lx) (not %lu (0x%lx)) error: %ld\n", (unsigned)i, (unsigned)j, ref, ref, test, test, e);
    err += abs(e);

    }
    }
    return 0;
    }
  4. sh1boot revised this gist Jun 10, 2013. 1 changed file with 33 additions and 4 deletions.
    37 changes: 33 additions & 4 deletions gistfile1.c
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@

    #include <stdio.h>
    #include <stdlib.h>
    #include <stdint.h>
    @@ -15,21 +16,49 @@ uint32_t divfn(uint32_t i, uint32_t j, int v)
    uint8_t shift = 0;
    uint16_t k;

    k = 0;
    while (j >= (1 << TABBITS))
    /* convert j to a 16.8 fixed-point number */
    j <<= 8;

    /* if j is too large to index the table, then we just shift it right until
    * it's within bounds.
    */
    while (j >= (256 << TABBITS))
    {
    k |= (j & 1) << shift;
    j >>= 1;
    shift++;
    }
    /* If j has been shifted right, we'll use the fractional part for linear
    * interpolation, and the integer part for the index.
    */
    k = j & 255;
    j >>= 8;

    /* look up (0x80000000 / j) */
    t = table2[j];
    t += (uint64_t)(table2[j + 1] - t) * k >> shift;
    if (k)
    {
    /* Linearly interpolate between adjacent table entries. Note that this
    * works well because the set of values which are large and need to be
    * shifted down and interpolated represent an especially linear part of
    * the table.
    */
    t += (uint64_t)(table2[j + 1] - t) * k >> 8;
    }

    /* rounding factor to properly balance the right shift. In assembly this can
    * be replaced with an addition of the resulting carry bit after the last
    * right shift.
    */
    r = 0x4000 << shift;

    /* debug */
    if (v)
    printf("r=%04x, j=%04x, t=%08x t>>s=%08x, (T=%08x), shift=%d\n", r, j, t, t >> shift, table[v], shift);

    /* compute i * (0x80000000/j) >> 15 for a 16.16 fixed-point value for i/j.
    * If we had to shift j right, earlier, then we'll shift the result right
    * as well, because larger should give us a smaller result.
    */
    return ((uint64_t)i * t + r) >> (15 + shift);
    }

  5. sh1boot created this gist Jun 7, 2013.
    69 changes: 69 additions & 0 deletions gistfile1.c
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,69 @@
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdint.h>

    uint32_t table[65536];

    #define TABBITS 8

    uint32_t table2[(1 << TABBITS) + 1];

    uint32_t divfn(uint32_t i, uint32_t j, int v)
    {
    uint32_t t;
    uint32_t r;
    uint8_t shift = 0;
    uint16_t k;

    k = 0;
    while (j >= (1 << TABBITS))
    {
    k |= (j & 1) << shift;
    j >>= 1;
    shift++;
    }

    t = table2[j];
    t += (uint64_t)(table2[j + 1] - t) * k >> shift;
    r = 0x4000 << shift;

    if (v)
    printf("r=%04x, j=%04x, t=%08x t>>s=%08x, (T=%08x), shift=%d\n", r, j, t, t >> shift, table[v], shift);

    return ((uint64_t)i * t + r) >> (15 + shift);
    }

    int main(void)
    {
    int i, j;
    uint64_t err = 0;

    for (j = 1; j < (1 << TABBITS) + 3; j++)
    {
    table2[j] = (0x80000000u + (j / 2)) / j;
    }

    for (j = 1; j < 65536; j += 15)
    {
    table[j] = (0x80000000u + (j / 2)) / j;

    for (i = 0; i < 65536; i += 1)
    {
    uint32_t ref = (uint32_t)((i << 16) + (j / 2)) / j;
    #if 0
    uint32_t test = ((uint64_t)i * table[j] + 0x4000) >> 15;
    #else
    uint32_t test = divfn(i, j, 0);
    #endif
    int32_t e = (int32_t)(ref - test);
    if (e > 1)
    {
    (void)divfn(i, j, j);
    printf("%d/%d==%u (0x%x) (not %u (0x%x)) error: %d\n", i, j, ref, ref, test, test, e);
    }
    err += abs(e);

    }
    }
    return 0;
    }