Created
January 20, 2012 21:28
-
-
Save wspringer/1649700 to your computer and use it in GitHub Desktop.
LZW Encoding *and Decoding* in Scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object lzw { | |
trait Appendable[T] { | |
def append(value: T) | |
} | |
import scala.collection.generic._ | |
case class GrowingAppendable[T](growable: Growable[T]) extends Appendable[T] { | |
def append(value: T) { | |
growable += value | |
} | |
} | |
trait Node { | |
def encode(b: Byte, out: Appendable[Int]): Node | |
def decode(i: Int, out: Appendable[Byte]): Node | |
def terminate(out: Appendable[Int]): Unit | |
} | |
class ValueNode(value: Int, rootNode: RootNode, val path: Array[Byte]) extends Node { | |
private val nodes = new Array[Node](255) | |
def encode(b: Byte, out: Appendable[Int]): Node = Option(nodes(0xff & b)).getOrElse { | |
out.append(value) | |
rootNode.createNode(path :+ b) match { | |
case Some(node) => | |
nodes(0xff & b) = node | |
rootNode.encode(b, out) | |
case None => | |
rootNode.encode(b, out) | |
} | |
} | |
def decode(i: Int, out: Appendable[Byte]): Node = { | |
val node = rootNode.nodeByValue(i).get | |
val firstByte = node.path.head | |
for (b <- node.path) out.append(b) | |
if (nodes(0xff & firstByte) == null) { | |
val next = rootNode.createNode(path :+ firstByte).get | |
nodes(0xff & firstByte) = next | |
} | |
node | |
} | |
def terminate(out: Appendable[Int]) { | |
out.append(value) | |
} | |
override def toString = "ValueNode(" + value + ")" | |
} | |
class RootNode(limit: Int = 512) extends Node { | |
private var index = 256 | |
private val initial = Array.tabulate[ValueNode](256)(b => new ValueNode(b, this, Array[Byte](b.asInstanceOf[Byte]))) | |
private val createdNodes = new Array[ValueNode](limit - 256) | |
def encode(b: Byte, out: Appendable[Int]): Node = initial(0xff & b) | |
def decode(i: Int, out: Appendable[Byte]): Node = { | |
val node = nodeByValue(i).get | |
for (b <- node.path) out.append(b) | |
node | |
} | |
def createNode(path: Array[Byte]): Option[Node] = { | |
if (index <= limit) { | |
val node = new ValueNode(index, this, path) | |
createdNodes(index - 256) = node | |
index += 1 | |
Some(node) | |
} else { | |
// In this particular implementation, there is no reset (yet) | |
None | |
} | |
} | |
def nodeByValue(value: Int): Option[ValueNode] = | |
// This method should use asserts rather than options | |
if (value < 256) Some(initial(value)) | |
else if (value > index) None | |
else Some(createdNodes(value - 256)) | |
def terminate(out: Appendable[Int]) { } | |
override def toString = "RootNode" | |
} | |
implicit def growableToAppendable[T](growable: Growable[T]) = new GrowingAppendable(growable) | |
implicit def stringToByteIterable(str: String) = str.getBytes.toIterable | |
def encode[F <% Iterable[Byte], T <% Appendable[Int]](in: F, out: T) = | |
in.foldLeft(new RootNode().asInstanceOf[Node])({ (node, b) => node.encode(b, out) }).terminate(out) | |
def decode[F <% Iterable[Int], T <% Appendable[Byte]](in: F, out: T) = | |
in.foldLeft(new RootNode().asInstanceOf[Node])({ (node, i) => node.decode(i, out) }) | |
def encode[F <% Iterable[Byte]](in: F): Seq[Int] = { | |
val buffer = new scala.collection.mutable.ArrayBuffer[Int] | |
encode(in, buffer) | |
buffer | |
} | |
def decode[F <% Iterable[Int]](in: F): Seq[Byte] = { | |
val buffer = new scala.collection.mutable.ArrayBuffer[Byte] | |
decode(in, buffer) | |
buffer | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Oh, and to LZW encode the String "blablablablablablabla":