sh1boot · December 18, 2015 05:29
diff --git a/gistfile1.c b/gistfile1.c

 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>

 uint32_t table[65536];

 #define TABBITS 8

 uint32_t table2[(1 << TABBITS) + 1];

 uint32_t divfn(uint32_t i, uint32_t j, int v)
 {
    uint32_t t;
    uint32_t r;
    uint8_t shift = 0;
    uint16_t k;

    /* convert j to a 16.8 fixed-point number */
    j <<= 8;

    /* if j is too large to index the table, then we just shift it right until
     * it's within bounds.
     */
    while (j >= (256 << TABBITS))
    {
        j >>= 1;
        shift++;
    }
    /* If j has been shifted right, we'll use the fractional part for linear
     * interpolation, and the integer part for the index.
     */
    k = j & 255;
    j >>= 8;

    /* look up (0x80000000 / j) */
    t = table2[j];
    if (k)
    {
        /* Linearly interpolate between adjacent table entries.  Note that this
         * works well because the set of values which are large and need to be
         * shifted down and interpolated represent an especially linear part of
         * the table.
         */
        t += (uint64_t)(table2[j + 1] - t) * k >> 8;
    }

    /* rounding factor to properly balance the right shift.  In assembly this can
     * be replaced with an addition of the resulting carry bit after the last
     * right shift.
     */
    r = 0x4000 << shift;

    /* debug */
    if (v)
        printf("r=%04x, j=%04x, t=%08x t>>s=%08x, (T=%08x), shift=%d\n", r, j, t, t >> shift, table[v], shift);

    /* compute i * (0x80000000/j) >> 15 for a 16.16 fixed-point value for i/j.
     * If we had to shift j right, earlier, then we'll shift the result right
     * as well, because larger should give us a smaller result.
     */
    return ((uint64_t)i * t + r) >> (15 + shift);
 }

 int main(void)
 {
    int i, j;
    uint64_t err = 0;

    for (j = 1; j < (1 << TABBITS) + 3; j++)
    {
        table2[j] = (0x80000000u + (j / 2)) / j;
    }

    for (j = 1; j < 65536; j += 15)
    {
        table[j] = (0x80000000u + (j / 2)) / j;

        for (i = 0; i < 65536; i += 1)
        {
            uint32_t ref = (uint32_t)((i << 16) + (j / 2)) / j;
 #if 0
            uint32_t test = ((uint64_t)i * table[j] + 0x4000) >> 15;
 #else
            uint32_t test = divfn(i, j, 0);
 #endif
            int32_t e = (int32_t)(ref - test);
            if (e > 1)
            {
                (void)divfn(i, j, j);
                printf("%d/%d==%u (0x%x) (not %u (0x%x)) error: %d\n", i, j, ref, ref, test, test, e);
            }
            err += abs(e);

        }
    }
    return 0;
 }

	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>

	uint32_t table[65536];

	#define TABBITS 8

	uint32_t table2[(1 << TABBITS) + 1];

	uint32_t divfn(uint32_t i, uint32_t j, int v)
	{
	uint32_t t;
	uint32_t r;
	uint8_t shift = 0;
	uint16_t k;

	/* convert j to a 16.8 fixed-point number */
	j <<= 8;

	/* if j is too large to index the table, then we just shift it right until
	* it's within bounds.
	*/
	while (j >= (256 << TABBITS))
	{
	j >>= 1;
	shift++;
	}
	/* If j has been shifted right, we'll use the fractional part for linear
	* interpolation, and the integer part for the index.
	*/
	k = j & 255;
	j >>= 8;

	/* look up (0x80000000 / j) */
	t = table2[j];
	if (k)
	{
	/* Linearly interpolate between adjacent table entries. Note that this
	* works well because the set of values which are large and need to be
	* shifted down and interpolated represent an especially linear part of
	* the table.
	*/
	t += (uint64_t)(table2[j + 1] - t) * k >> 8;
	}

	/* rounding factor to properly balance the right shift. In assembly this can
	* be replaced with an addition of the resulting carry bit after the last
	* right shift.
	*/
	r = 0x4000 << shift;

	/* debug */
	if (v)
	printf("r=%04x, j=%04x, t=%08x t>>s=%08x, (T=%08x), shift=%d\n", r, j, t, t >> shift, table[v], shift);

	/* compute i * (0x80000000/j) >> 15 for a 16.16 fixed-point value for i/j.
	* If we had to shift j right, earlier, then we'll shift the result right
	* as well, because larger should give us a smaller result.
	*/
	return ((uint64_t)i * t + r) >> (15 + shift);
	}

	int main(void)
	{
	int i, j;
	uint64_t err = 0;

	for (j = 1; j < (1 << TABBITS) + 3; j++)
	{
	table2[j] = (0x80000000u + (j / 2)) / j;
	}

	for (j = 1; j < 65536; j += 15)
	{
	table[j] = (0x80000000u + (j / 2)) / j;

	for (i = 0; i < 65536; i += 1)
	{
	uint32_t ref = (uint32_t)((i << 16) + (j / 2)) / j;
	#if 0
	uint32_t test = ((uint64_t)i * table[j] + 0x4000) >> 15;
	#else
	uint32_t test = divfn(i, j, 0);
	#endif
	int32_t e = (int32_t)(ref - test);
	if (e > 1)
	{
	(void)divfn(i, j, j);
	printf("%d/%d==%u (0x%x) (not %u (0x%x)) error: %d\n", i, j, ref, ref, test, test, e);
	}
	err += abs(e);

	}
	}
	return 0;
	}