Last active
August 25, 2017 06:40
-
-
Save oertl/60d52c9d308fecb5dc9d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class HighDynamicRangeQuantile { | |
private final long[] counts; | |
private double minimum = Double.POSITIVE_INFINITY; | |
private double maximum = Double.NEGATIVE_INFINITY; | |
private long underFlowCount = 0; | |
private long overFlowCount = 0; | |
private final double factor; | |
private final double offset; | |
private final double minExpectedQuantileValue; | |
public HighDynamicRangeQuantile(final double minExpectedQuantileValue, final double maxExpectedQuantileValue, final double maxRelativeError) { | |
assert minExpectedQuantileValue >= Double.MIN_NORMAL; // bit operations below are not correct for subnormals | |
// TODO range checks | |
this.minExpectedQuantileValue = minExpectedQuantileValue; | |
factor = 0.25/Math.log(maxRelativeError); | |
offset = getIndexHelper(minExpectedQuantileValue); | |
final int arraySize = getIndex(maxExpectedQuantileValue) + 1; | |
counts = new long[arraySize]; | |
} | |
int getIndex(final double value) { | |
return (int) (getIndexHelper(value) - offset); | |
} | |
double getIndexHelper(final double value) { | |
final long valueBits = Double.doubleToRawLongBits(value); | |
final long exponent = (valueBits & 0x7ff0000000000000L) >>> 52; | |
final double exponentMul3 = exponent + (exponent << 1); | |
final double mantissaPlus1 = Double.longBitsToDouble((valueBits & 0x800fffffffffffffL) | 0x3ff0000000000000L); | |
return factor*((mantissaPlus1-1.)*(5.-mantissaPlus1)+exponentMul3); | |
} | |
public void add(final double value, final long count) { | |
if (value >= minExpectedQuantileValue) { | |
final int idx = getIndex(value); | |
if (idx < counts.length) { | |
counts[idx] += count; | |
} | |
else { | |
overFlowCount += count; | |
} | |
} | |
else { | |
underFlowCount += count; | |
} | |
minimum = Math.min(minimum, value); | |
maximum = Math.max(maximum, value); | |
} | |
public long getCount() { | |
long sum = underFlowCount + overFlowCount; | |
for (final long c : counts) { | |
sum += c; | |
} | |
return sum; | |
} | |
} |
My previous comment was ill thought out. The offset is a double which allows the code to work as intended.
I have some simplifications that improve speed by a few percent and also improve accuracy.
I will send a pull request.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This idea here allows the first bucket to have a smaller range than expected. If you divide the incoming sample by the minimumExpectedValue instead of using a bucket offset, you guarantee that the left boundary of the first bucket is at exactly minimumExpectedValue. That seems preferable to me.