Last active
January 2, 2024 11:14
-
-
Save kprotty/08e4fefaa73186da3b573c1395689354 to your computer and use it in GitHub Desktop.
Simple LZ4 block enc/dec in 100 LOC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const assert = @import("std").debug.assert; | |
fn compressBlock(writer: anytype, src: []const u8) !void { | |
var table = [_]u32{0} ** 4096; // size is pow2. bump to match more. ideal = (0xffff+1) / sizeof(u32) | |
var anchor: u32 = 0; | |
if (src.len > 12) { // LZ4 spec restriction: last match must start 12b before end of block. | |
var pos: u32 = 0; | |
while (pos + 4 < src.len - 5) { // LZ4 spec restriction: last 5b are always literal. | |
const blk: u32 = @bitCast(src[pos..][0..4].*); | |
const hash = (blk *% 0x9e3779b9) >> (32 - @ctz(table.len)); | |
const match = table[hash] -% 1; | |
table[hash] = pos +% 1; | |
const offset = pos -% match; | |
if (match == ~@as(u32, 0) or offset > 0xffff or blk != @as(u32, @bitCast(src[match..][0..4].*))) { | |
pos += 1; | |
continue; | |
} | |
var matches: u32 = 4; | |
while (pos + matches < src.len - 5 and src[pos + matches] == src[match + matches]) | |
matches += 1; | |
const literals = pos - anchor; | |
pos += matches; | |
matches -= 4; | |
const token = ((if (literals < 0xf) literals else 0xf) << 4) | (if (matches < 0xf) matches else 0xf); | |
try writer.writeByte(@intCast(token)); | |
if (literals >= 0xf) { | |
var n = literals - 0xf; | |
while (n >= 0xff) : (n -= 0xff) try writer.writeByte(0xff); | |
try writer.writeByte(@intCast(n)); | |
} | |
try writer.writeAll(src[anchor..][0..literals]); | |
anchor = pos; | |
try writer.writeInt(u16, @intCast(offset), .little); | |
if (matches >= 0xf) { | |
var n = matches - 0xf; | |
while (n >= 0xff) : (n -= 0xff) try writer.writeByte(0xff); | |
try writer.writeByte(@intCast(n)); | |
} | |
} | |
} | |
const literals = src.len - anchor; | |
if (anchor == 0) return error.Uncompressable; | |
try writer.writeByte(@intCast((if (literals < 0xf) literals else 0xf) << 4)); | |
if (literals >= 0xf) { | |
var n = literals - 0xf; | |
while (n >= 0xff) : (n -= 0xff) try writer.writeByte(0xff); | |
try writer.writeByte(@intCast(n)); | |
} | |
assert(anchor < src.len); | |
try writer.writeAll(src[anchor..]); | |
} | |
fn decompressBlock(dst: []u8, reader: anytype) ![]u8 { | |
var pos: u32 = 0; | |
while (true) { | |
const token: u32 = try reader.readByte(); | |
assert(pos < dst.len); | |
var literals = token >> 4; | |
while (literals >= 0xf) { | |
const n = try reader.readByte(); | |
literals += n; | |
if (n != 0xff) break; | |
} | |
try reader.readNoEof(dst[pos..][0..literals]); | |
pos += literals; | |
var matches = token & 0xf; | |
var offset: u32 = reader.readInt(u16, .little) catch |err| switch (err) { | |
error.EndOfStream => { // last literals token has no offset. | |
assert(matches == 0); | |
return dst[0..pos]; | |
}, | |
else => |e| return e, | |
}; | |
assert(offset > 0); | |
offset = pos - offset; | |
while (matches >= 0xf) { | |
const n = try reader.readByte(); | |
matches += n; | |
if (n != 0xff) break; | |
} | |
matches += 4; | |
for (0..matches) |i| dst[pos + i] = dst[offset + i]; // TODO: optimize this. | |
pos += matches; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment