Skip to content

Instantly share code, notes, and snippets.

@bluwy
Last active January 1, 2025 14:51
Show Gist options
  • Save bluwy/4e8c411c3a3f2024394d1e8e423b6776 to your computer and use it in GitHub Desktop.
Save bluwy/4e8c411c3a3f2024394d1e8e423b6776 to your computer and use it in GitHub Desktop.
Performance between using node Buffer vs ArrayBuffer to parse tar files
┌─────────┬──────────────────────┬──────────────────────┬─────────────────────┬────────────────────────────┬───────────────────────────┬─────────┐
│ (index) │ Task name │ Latency average (ns) │ Latency median (ns) │ Throughput average (ops/s) │ Throughput median (ops/s) │ Samples │
├─────────┼──────────────────────┼──────────────────────┼─────────────────────┼────────────────────────────┼───────────────────────────┼─────────┤
│ 0 │ 'buffer' │ '53789.86 ± 0.14%' │ '52708.03' │ '18746 ± 0.06%' │ '18972' │ 37182 │
│ 1 │ 'array buffer' │ '39104.97 ± 0.74%' │ '36292.02' │ '26764 ± 0.08%' │ '27554' │ 51145 │
│ 2 │ 'array buffer extra' │ '37756.63 ± 0.25%' │ '36250.00' │ '27002 ± 0.07%' │ '27586' │ 52971 │
└─────────┴──────────────────────┴──────────────────────┴─────────────────────┴────────────────────────────┴───────────────────────────┴─────────┘
import { readFileSync } from 'fs'
import { Bench } from 'tinybench'
import zlib from 'zlib'
import util from 'util'
const gunzip = util.promisify(zlib.gunzip)
const tgzNodeBuffer = readFileSync('./@babel__core-7.26.0.tgz')
const buffer = await gunzip(tgzNodeBuffer)
const arrayBuffer = buffer.buffer
const bench = new Bench({
time: 2000,
warmupTime: 500,
})
bench
.add('buffer', async () => {
// const buffer = await gunzip(tgzNodeBuffer)
unpackWithBuffer(buffer)
})
.add('array buffer', async () => {
// const arrayBuffer = (await gunzip(tgzNodeBuffer)).buffer
unpackWithArrayBuffer(arrayBuffer)
})
.add('array buffer extra', async () => {
// const arrayBuffer = (await gunzip(tgzNodeBuffer)).buffer
unpackWithArrayBuffer(arrayBuffer)
})
await bench.run()
console.table(bench.table())
function unpackWithBuffer(content) {
/** @type {string[]} */
const fileNames = []
let offset = 0
while (offset < content.length) {
// Get file type from header (from offset 156, 1 byte)
const type = content.subarray(offset + 156, offset + 157).toString()
// Skip empty blocks at end
if (type === '\0') break
// Only handle files (0). Packed packages often only contain files and no directories.
// It may contain PAX headers (x) and global PAX headers (g), but we don't need to handle those.
if (type === '0') {
// Get file name from header (from offset 0, 100 bytes)
const name = content
.subarray(offset, offset + 100)
.toString()
.split('\0', 1)[0]
fileNames.push(name)
}
// Get file size from header (from offset 124, 12 bytes)
const size = parseInt(
content.subarray(offset + 124, offset + 136).toString(),
8
)
// Skip header and file content (padded to 512 bytes)
offset += 512 + Math.ceil(size / 512) * 512
}
return fileNames
}
export function unpackWithArrayBuffer(tarball) {
const decoder = new TextDecoder()
const fileNames = []
let offset = 0
while (offset < tarball.byteLength) {
// Get file type from header (from offset 156, 1 byte)
const type = read(tarball, decoder, offset + 156, 1)
// Skip empty blocks at end
if (type === '\0') break
// Get file size from header (from offset 124, 12 bytes)
const size = parseInt(read(tarball, decoder, offset + 124, 12), 8)
// Only handle files (0). Packed packages often only contain files and no directories.
// It may contain PAX headers (x) and global PAX headers (g), but we don't need to handle those.
if (type === '0') {
// Get file name from header (from offset 0, 100 bytes)
const name = read(tarball, decoder, offset, 100).split('\0', 1)[0]
fileNames.push(name)
}
// Skip header and file content (padded to 512 bytes)
offset += 512 + Math.ceil(size / 512) * 512
}
return fileNames
}
export function unpackWithArrayBufferExtra(tarball) {
const decoder = new TextDecoder()
const files = []
let offset = 0
while (offset < tarball.byteLength) {
// Get file type from header (from offset 156, 1 byte)
const type = read(tarball, decoder, offset + 156, 1)
// Skip empty blocks at end
if (type === '\0') break
// Get file size from header (from offset 124, 12 bytes)
const size = parseInt(read(tarball, decoder, offset + 124, 12), 8)
// Only handle files (0). Packed packages often only contain files and no directories.
// It may contain PAX headers (x) and global PAX headers (g), but we don't need to handle those.
if (type === '0') {
// Get file name from header (from offset 0, 100 bytes)
const name = read(tarball, decoder, offset, 100).split('\0', 1)[0]
// Get file content from header (from offset 512, `size` bytes)
const data = new Uint8Array(tarball, offset + 512, size)
files.push({ name, data })
}
// Skip header and file content (padded to 512 bytes)
offset += 512 + Math.ceil(size / 512) * 512
}
return files
}
/**
* @param {ArrayBuffer} buffer
* @param {TextDecoder} decoder
* @param {number} offset
* @param {number} length
*/
function read(buffer, decoder, offset, length) {
const view = new Uint8Array(buffer, offset, length)
return decoder.decode(view)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment