Last active
December 10, 2015 17:18
-
-
Save zhaoyao/4467213 to your computer and use it in GitHub Desktop.
Huffman Encoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package snip; | |
import java.io.EOFException; | |
import java.util.Arrays; | |
import java.util.PriorityQueue; | |
/** | |
* User: zhaoyao | |
* Date: 1/5/13 | |
* Time: 10:53 PM | |
*/ | |
public class Huffman { | |
static class HuffmanCode { | |
int code; | |
int nbits; | |
HuffmanCode(int code, int nbits) { | |
this.code = code; | |
this.nbits = nbits; | |
} | |
@Override | |
public String toString() { | |
return "HuffmanCode{" + | |
"code=" + Integer.toBinaryString(code) + | |
", nbits=" + nbits + | |
'}'; | |
} | |
} | |
static class HuffmanNode implements Comparable<HuffmanNode> { | |
byte sym; | |
int freq; | |
HuffmanNode left; | |
HuffmanNode right; | |
HuffmanNode(HuffmanNode left, HuffmanNode right) { | |
this.freq = left.freq + right.freq; | |
this.left = left; | |
this.right = right; | |
} | |
HuffmanNode(byte sym, int freq) { | |
this.sym = sym; | |
this.freq = freq; | |
} | |
@Override | |
public int compareTo(HuffmanNode huffmanNode) { | |
return this.freq - huffmanNode.freq; | |
} | |
public String toString() { | |
if (left == null && right == null) { | |
return "<" + sym + ":" + freq + ">"; | |
} else { | |
return "<" + freq + ":(" + left + "), (" + right + ")>"; | |
} | |
} | |
@Override | |
public boolean equals(Object o) { | |
if (this == o) return true; | |
if (o == null || getClass() != o.getClass()) return false; | |
HuffmanNode node = (HuffmanNode) o; | |
if (left == null || right == null) { | |
return node.left == null || node.right == null; | |
} | |
return left.equals(node.left) && right.equals(node.right); | |
} | |
} | |
public static byte[] compress(byte[] data) throws Exception { | |
int[] freqs = new int[256]; // byte range (-127 - 128) | |
HuffmanCode[] table = new HuffmanCode[256]; | |
int nbytes = data.length; | |
for (byte b : data) { | |
int p = b + 128; // transform the negative byte | |
freqs[p]++; | |
} | |
//在 huffman header中的freq table 需要使用一个byte存储符号的频率 | |
//于是需要将所有的频率等比例缩放到0-255(UCHAR_MAX, 0xff)能容纳的大小 | |
int max = 255; | |
for (int freq : freqs) { | |
if (freq > max) { | |
max = freq; | |
} | |
} | |
for (int i = 0; i < freqs.length; i++) { | |
int freq = freqs[i]; | |
int scale = (int) (freq / ((double) max / (double) 255)); | |
if (scale == 0 && freq != 0) { | |
freqs[i] = 1; | |
} else { | |
freqs[i] = scale; | |
} | |
} | |
HuffmanNode root = buildTree(freqs); | |
buildTable(root, table, 0, (short) 0); | |
byte[] compressed = new byte[260 + data.length]; //4bytes length + 256 bytes freq table | |
int offset = 0; | |
compressed[offset++] = (byte) ((nbytes >> 24) & 0xff); | |
compressed[offset++] = (byte) ((nbytes >> 16) & 0xff); | |
compressed[offset++] = (byte) ((nbytes >> 8) & 0xff); | |
compressed[offset++] = (byte) ((nbytes) & 0xff); | |
for (int freq : freqs) { | |
compressed[offset++] = (byte) freq; | |
} | |
int bitsWrite = offset * 8; | |
for (byte b : data) { | |
int c = b + 128; | |
if (table[c] == null) { | |
throw new IllegalStateException("huffman table " + c + " is null"); | |
} | |
int code = table[c].code; | |
for (int i = 0; i < table[c].nbits; i++) { | |
int nb = bitsWrite / 8; | |
//huffman code从int的低bit开始 | |
int k = Integer.SIZE - table[c].nbits; | |
boolean flag = bitGet(code, k + i); | |
compressed[nb] = bitSet(compressed[nb], flag, bitsWrite % 8); | |
bitsWrite++; | |
} | |
} | |
int compressedLen = bitsWrite % 8 == 0 ? bitsWrite / 8 : bitsWrite / 8 + 1; | |
return Arrays.copyOfRange(compressed, 0, compressedLen); | |
} | |
public static byte[] decompress(byte[] compressed) throws EOFException { | |
int offset = 0; | |
int originalLen = (compressed[offset++] & 0xff) << 24 | | |
(compressed[offset++] & 0xff) << 16 | | |
(compressed[offset++] & 0xff) << 8 | | |
(compressed[offset++] & 0xff); | |
int[] freqs = new int[256]; | |
for (int i = 0; i < freqs.length; i++) { | |
freqs[i] = compressed[offset++] & 0xff; | |
} | |
HuffmanNode root = buildTree(freqs); | |
byte[] result = new byte[originalLen]; | |
int bitsRead = offset * 8; | |
int i = 0; | |
while (i < originalLen) { | |
HuffmanNode node = root; | |
for (; ; ) { | |
if (bitsRead / 8 >= compressed.length) { | |
throw new EOFException(); | |
} | |
byte b = compressed[bitsRead / 8]; | |
if (bitGet(b, 24 + bitsRead % 8)) { //code只占用最后8位,所以从24位开始算起 | |
node = node.right; | |
} else { | |
node = node.left; | |
} | |
bitsRead++; | |
if (node.left == null && node.right == null) { | |
result[i++] = node.sym; | |
break; | |
} | |
} | |
} | |
return result; | |
} | |
private static boolean bitGet(int val, int p) { | |
int mask = 0x80000000; | |
while (p-- > 0) { | |
mask >>>= 1; | |
} | |
return (val & mask) == mask; | |
} | |
private static byte bitSet(byte val, boolean flag, int i) { | |
int mask = 0x80; | |
if (i >= Byte.SIZE) { | |
throw new IllegalArgumentException("bit offset overflow: " + i); | |
} | |
while (i-- > 0) { | |
mask >>>= 1; | |
} | |
return (byte) (flag ? val | mask : val & (~mask)); | |
} | |
private static void buildTable(HuffmanNode node, HuffmanCode[] table, int code, int nbits) { | |
if (node.left != null) { | |
buildTable(node.left, table, code << 1, (nbits + 1)); | |
} | |
if (node.right != null) { | |
buildTable(node.right, table, code << 1 | 0x0001, (nbits + 1)); | |
} | |
if (node.left == null && node.right == null) { | |
table[node.sym + 128] = new HuffmanCode(code, nbits); | |
} | |
} | |
private static HuffmanNode buildTree(int[] freqs) { | |
PriorityQueue<HuffmanNode> pq = new PriorityQueue<HuffmanNode>(); | |
for (int i = 0; i < freqs.length; i++) { | |
int freq = freqs[i]; | |
if (freq != 0) { | |
pq.add(new HuffmanNode((byte) (i - 128), freq)); | |
} | |
} | |
while (!pq.isEmpty()) { | |
if (pq.size() >= 2) { | |
HuffmanNode left = pq.poll(); | |
HuffmanNode right = pq.poll(); | |
pq.add(new HuffmanNode(left, right)); | |
} else { | |
break; | |
} | |
} | |
if (pq.size() != 1) { | |
throw new IllegalStateException("Unexpected huffman tree root: " + pq.size()); | |
} | |
return pq.poll(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment