Created
June 22, 2012 09:37
Revisions
-
rjeschke created this gist
Jun 22, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,407 @@ /* * Copyright (C) 2012 René Jeschke <[email protected]> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jfloat; import java.math.BigDecimal; import java.math.BigInteger; /* This thing is WIP. Currently basic mul/div is working, without * proper rounding, though. Everything is quick'n'dirty ... * * Proof-of-concept and playground. * * 128 Bit: (4 * UI32) * 1 sign * 15 bit exponent, bias: 16383 * 112 mantissa */ public class Float128 { private final static BigDecimal BD_2 = new BigDecimal(2); private final static BigDecimal MAN_DIV = new BigDecimal(new BigInteger("10000000000000000000000000000", 16)); public static int getSignBit(int[] num) { return num[0] & 0x80000000; } public static int getExponent(int[] num) { return ((num[0] >> 16) & 0x7fff) - 16383; } private static void carryAdd4(int[] a, int carry) { int c = carry; if(c == 0) return; for(int i = 3; i >= 0; i--) { long t = (a[i] & 0xffffffffL) + c; a[i] = (int)t; c = (int)(t >> 32); if(c == 0) return; } } private static int getSetBit8(int t) { if((t & 0x80) != 0) return 8; if((t & 0x40) != 0) return 7; if((t & 0x20) != 0) return 6; if((t & 0x10) != 0) return 5; if((t & 0x08) != 0) return 4; if((t & 0x04) != 0) return 3; if((t & 0x02) != 0) return 2; if((t & 0x01) != 0) return 1; return 0; } private static int getSetBit(int t) { if(t == 0) return 0; if((t & 0xff000000) != 0) return getSetBit8(t >> 24) + 24; if((t & 0x00ff0000) != 0) return getSetBit8(t >> 16) + 16; if((t & 0x0000ff00) != 0) return getSetBit8(t >> 8) + 8; if((t & 0x000000ff) != 0) return getSetBit8(t); return 0; } public static int[] div(int[] a, int[] b, int[] r) { final int sign = (a[0] ^ b[0]) & 0x80000000; final int e0 = (a[0] >> 16) & 0x7fff; final int e1 = (b[0] >> 16) & 0x7fff; // We flush denormals automatically if(e0 == 0) { r[0] = r[1] = r[2] = r[3] = 0; return r; } // For now we throw up if(e1 == 0) { throw new ArithmeticException("The mighty Neet did not yet define division by zero"); } // k is either 7 or 8 (2^k bits of precision, so 7 should be enough) // N = mant(a) >> 1 // D = mand(b) >> 1 // e = 1 - D // Q = N // for(i=0, k-1) // Q = Q + Q * e // e = e * e // end int[] e = new int[4]; int[] q = new int[4]; // 8.120 bit fixed point e[0] = ((((b[0] & 0xffff) | 0x10000) << 7) | (b[1] >>> 25)) ^ 0xffffff; e[1] = ((b[1] << 7) | (b[2] >>> 25)) ^ -1; e[2] = ((b[2] << 7) | (b[3] >>> 25)) ^ -1; e[3] = (b[3] << 7) ^ -1; carryAdd4(e, 1); q[0] = (((a[0] & 0xffff) | 0x10000) << 7) | (a[1] >>> 25); q[1] = (a[1] << 7) | (a[2] >>> 25); q[2] = (a[2] << 7) | (a[3] >>> 25); q[3] = (a[3] << 7); int[] ret = new int[8]; for(int n = 0; n < 7; n++) { long t; mul8_120(q, e, ret); ret[0] = (ret[0] << 8) | (ret[1] >>> 24); ret[1] = (ret[1] << 8) | (ret[2] >>> 24); ret[2] = (ret[2] << 8) | (ret[3] >>> 24); ret[3] = (ret[3] << 8) | (ret[4] >>> 24); t = (q[3] & 0xffffffffL) + (ret[3] & 0xffffffffL); q[3] = (int)t; t = (q[2] & 0xffffffffL) + (ret[2] & 0xffffffffL) + (t >>> 32); q[2] = (int)t; t = (q[1] & 0xffffffffL) + (ret[1] & 0xffffffffL) + (t >>> 32); q[1] = (int)t; t = (q[0] & 0xffffffffL) + (ret[0] & 0xffffffffL) + (t >>> 32); q[0] = (int)t; mul8_120(e, e, ret); e[0] = (ret[0] << 8) | (ret[1] >>> 24); e[1] = (ret[1] << 8) | (ret[2] >>> 24); e[2] = (ret[2] << 8) | (ret[3] >>> 24); e[3] = (ret[3] << 8) | (ret[4] >>> 24); } final int sb = getSetBit(q[0]); final int exp = e0 - e1 + sb + 16383 - 25; if(exp <= 0) { r[0] = r[1] = r[2] = r[3] = 0; return r; } // TODO +/- inf final int s0 = sb - 17; final int s1 = 32 - s0; r[0] = sign | (exp << 16) | ((q[0] >> s0) & 0xffff); r[1] = (q[0] << s1) | (q[1] >>> s0); r[2] = (q[1] << s1) | (q[2] >>> s0); r[3] = (q[2] << s1) | (q[3] >>> s0); return r; } public static int[] mul8_120(int[] a, int[] b, int[] ret) { long t, carry, sum; t = (a[3] & 0xffffffffL) * (b[3] & 0xffffffffL); carry = t >>> 32; ret[7] = (int)t; t = (a[2] & 0xffffffffL) * (b[3] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[3] & 0xffffffffL) * (b[2] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; carry += sum >>> 32; ret[6] = (int)sum; t = (a[1] & 0xffffffffL) * (b[3] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[2] & 0xffffffffL) * (b[2] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; t = (a[3] & 0xffffffffL) * (b[1] & 0xffffffffL); sum += t & 0xffffffffL; carry += (t >>> 32) + (sum >>> 32); ret[5] = (int)sum; t = (a[0] & 0xffffffffL) * (b[3] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[1] & 0xffffffffL) * (b[2] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; t = (a[2] & 0xffffffffL) * (b[1] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; t = (a[3] & 0xffffffffL) * (b[0] & 0xffffffffL); sum += t & 0xffffffffL; carry += (t >>> 32) + (sum >>> 32); ret[4] = (int)sum; t = (a[0] & 0xffffffffL) * (b[2] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[1] & 0xffffffffL) * (b[1] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; t = (a[2] & 0xffffffffL) * (b[0] & 0xffffffffL); sum += t & 0xffffffffL; carry += (t >>> 32) + (sum >>> 32); ret[3] = (int)sum; t = (a[0] & 0xffffffffL) * (b[1] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[1] & 0xffffffffL) * (b[0] & 0xffffffffL); sum += t & 0xffffffffL; carry += (t >>> 32) + (sum >>> 32); ret[2] = (int)sum; t = (a[0] & 0xffffffffL) * (b[0] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; ret[1] = (int)sum; ret[0] = (int)((t >>> 32) + (sum >>> 32)); return ret; } public static int[] mul(int[] a, int[] b, int[] r) { final int sign = (a[0] ^ b[0]) & 0x80000000; final int e0 = (a[0] >> 16) & 0x7fff; final int e1 = (b[0] >> 16) & 0x7fff; // We flush denormals automatically if(e0 == 0 || e1 == 0) { r[0] = r[1] = r[2] = r[3] = 0; return r; } final int[] temp = mantMul(a, b, new int[8]); final int s = (temp[0] >> 1); final int exp = e0 + e1 - 16383 + s; if(exp <= 0) { r[0] = r[1] = r[2] = r[3] = 0; return r; } // TODO +/- inf final int s0 = 16 - s; final int s1 = 32 - s0; r[0] = sign | (exp << 16) | (((temp[0] << s0) | (temp[1] >>> s1)) & 0xffff); r[1] = (temp[1] << s0) | (temp[2] >>> s1); r[2] = (temp[2] << s0) | (temp[3] >>> s1); r[3] = (temp[3] << s0) | (temp[4] >>> s1); return r; } private static int[] mantMul(int[] a, int[] b, int[] ret) { long t, carry, sum; final int a0 = (a[0] & 0xffff) | 0x10000; final int b0 = (b[0] & 0xffff) | 0x10000; t = (a[3] & 0xffffffffL) * (b[3] & 0xffffffffL); carry = t >>> 32; ret[7] = (int)t; t = (a[2] & 0xffffffffL) * (b[3] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[3] & 0xffffffffL) * (b[2] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; carry += sum >>> 32; ret[6] = (int)sum; t = (a[1] & 0xffffffffL) * (b[3] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[2] & 0xffffffffL) * (b[2] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; t = (a[3] & 0xffffffffL) * (b[1] & 0xffffffffL); sum += t & 0xffffffffL; carry += (t >>> 32) + (sum >>> 32); ret[5] = (int)sum; t = a0 * (b[3] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[1] & 0xffffffffL) * (b[2] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; t = (a[2] & 0xffffffffL) * (b[1] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; t = (a[3] & 0xffffffffL) * b0; sum += t & 0xffffffffL; carry += (t >>> 32) + (sum >>> 32); ret[4] = (int)sum; t = a0 * (b[2] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[1] & 0xffffffffL) * (b[1] & 0xffffffffL); carry += t >>> 32; sum += t & 0xffffffffL; t = (a[2] & 0xffffffffL) * b0; sum += t & 0xffffffffL; carry += (t >>> 32) + (sum >>> 32); ret[3] = (int)sum; t = a0 * (b[1] & 0xffffffffL); sum = (t & 0xffffffffL) + carry; carry = t >>> 32; t = (a[1] & 0xffffffffL) * b0; sum += t & 0xffffffffL; carry += (t >>> 32) + (sum >>> 32); ret[2] = (int)sum; t = (long)a0 * (long)b0; sum = (t & 0xffffffffL) + carry; ret[1] = (int)sum; ret[0] = (int)((t >>> 32) + (sum >>> 32)); return ret; } public static int[] fromDouble(double v, int[] n) { long bits = Double.doubleToRawLongBits(v); int sign = (int)((bits >> 63) & 1); int exp = ((int)(bits >> 52) & 2047) - 1023; int m0 = (int)(bits >> 36) & 0xffff; int m1 = (int)(bits >> 4); int m2 = (int)(bits & 15) << 28; n[0] = (sign << 31) | ((exp + 16383) << 16) | m0; n[1] = m1; n[2] = m2; n[3] = 0; return n; } private static String mantissaToHex(int[] n) { return String.format(n[0] < 0 ? "-%x%08x%08x%08x" : "%x%08x%08x%08x", (n[0] & 0xffff) | 0x10000, n[1], n[2], n[3]); } public static BigDecimal toBigDecimal(int[] n) { final BigDecimal mant = new BigDecimal(new BigInteger(mantissaToHex(n), 16)).divide(MAN_DIV); int exp = getExponent(n); final BigDecimal em = exp < 0 ? BigDecimal.ONE.divide(BD_2.pow(-exp)) : BD_2.pow(exp); return mant.multiply(em); } public static String toString(int[] n) { if((n[0] & 0x7fff0000) == 0) return "0"; return toBigDecimal(n).toString(); } public static String toHexString(int[] n) { StringBuilder sb = new StringBuilder(); for(int i = 0; i < n.length; i++) sb.append(String.format(i == 0 ? "%08x" : "%08x", n[i])); return sb.toString(); } }