Created
January 28, 2026 15:40
-
-
Save jerch/e2f7695b887228d9e703c53af23c0737 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import { InWasm, OutputMode, OutputType } from 'inwasm'; | |
| const BYTES = new Uint8Array(786432*16); | |
| const DATA = new Uint8Array(1048576*16); | |
| const PIXELS = 262144*16; | |
| for (let i = 0; i < BYTES.length; i+= 3) { | |
| BYTES[i] = 50; | |
| BYTES[i+1] = 100; | |
| BYTES[i+2] = 200; | |
| } | |
| function m1(): number { | |
| const start = Date.now(); | |
| const bytesPerPixel = 3; | |
| const BYTES_PER_PIXEL_RGBA = 4; | |
| const isRgba = false; | |
| let dstOffset = 0; | |
| let srcOffset = 0; | |
| for (let i = 0; i < PIXELS; i++) { | |
| DATA[dstOffset ] = BYTES[srcOffset ]; // R | |
| DATA[dstOffset + 1] = BYTES[srcOffset + 1]; // G | |
| DATA[dstOffset + 2] = BYTES[srcOffset + 2]; // B | |
| DATA[dstOffset + 3] = isRgba ? BYTES[srcOffset + 3] : 255; | |
| srcOffset += bytesPerPixel; | |
| dstOffset += BYTES_PER_PIXEL_RGBA; | |
| } | |
| const end = Date.now(); | |
| return end - start; | |
| } | |
| function m2() { | |
| const start = Date.now(); | |
| let dstOffset = 0; | |
| let srcOffset = 0; | |
| for (let i = 0; i < PIXELS; i++) { | |
| DATA[dstOffset ] = BYTES[srcOffset ]; // R | |
| DATA[dstOffset + 1] = BYTES[srcOffset + 1]; // G | |
| DATA[dstOffset + 2] = BYTES[srcOffset + 2]; // B | |
| DATA[dstOffset + 3] = 255; | |
| srcOffset += 3; | |
| dstOffset += 4; | |
| } | |
| const end = Date.now(); | |
| return end - start; | |
| } | |
| function m3() { | |
| const start = Date.now(); | |
| const bytes32 = new Uint32Array(BYTES.buffer); | |
| const data32 = new Uint32Array(DATA.buffer); | |
| let dstOffset = 0; | |
| let srcOffset = 0; | |
| for (let i = 0; i < PIXELS; i += 4) { | |
| const bloc1 = bytes32[srcOffset++]; | |
| const bloc2 = bytes32[srcOffset++]; | |
| const bloc3 = bytes32[srcOffset++]; | |
| data32[dstOffset++] = bloc1 | 0xFF000000; | |
| data32[dstOffset++] = (bloc1 >> 24) | (bloc2 << 8) | 0xFF000000; | |
| data32[dstOffset++] = (bloc2 >> 16) | (bloc3 << 16) | 0xFF000000; | |
| data32[dstOffset++] = (bloc3 >> 8) | 0xFF000000; | |
| } | |
| const end = Date.now(); | |
| return end - start; | |
| } | |
| function check() { | |
| const data32 = new Uint32Array(DATA.buffer); | |
| for (let i = 0; i < data32.length; ++i) { | |
| if (data32[i] != 0xFFC86432) { | |
| throw Error(`wrong value ${data32[i]} at index ${i}`); | |
| } | |
| } | |
| data32.fill(0); | |
| } | |
| const wasmInterleave = InWasm({ | |
| name: 'decode', | |
| type: OutputType.INSTANCE, | |
| mode: OutputMode.SYNC, | |
| srctype: 'Clang-C', | |
| exports: { | |
| memory: new WebAssembly.Memory({initial: 500, maximum: 500}), | |
| interleave: (pixels: number) => 0, | |
| }, | |
| compile: { | |
| switches: ['-msimd128', '-Wl,-z,stack-size=0', '-Wl,--stack-first'] | |
| }, | |
| code: ` | |
| #include <wasm_simd128.h> | |
| int interleave(int pixels) { | |
| // hardcoded mem offsets for simplicity | |
| char *src = (char*) 1024; | |
| char *dst = (char*) 12583936; | |
| v128_t opaque = wasm_u32x4_splat(0xFF000000); | |
| v128_t picker = wasm_i8x16_const(0, 1, 2, 16, 3, 4, 5, 16, 6, 7, 8, 16, 9, 10, 11, 16); | |
| for (int i = 0; i < pixels; i += 4) { | |
| v128_t data = wasm_v128_load((v128_t *) src); | |
| // swizzle + or ist faster | |
| data = wasm_i8x16_swizzle(data, picker); | |
| data = wasm_v128_or(data, opaque); | |
| // shuffle runs slower | |
| //data = wasm_i8x16_shuffle(data, opaque, 0, 1, 2, 19, 3, 4, 5, 23, 6, 7, 8, 27, 9, 10, 11, 31); | |
| wasm_v128_store((v128_t *) dst, data); | |
| src += 12; | |
| dst += 16; | |
| } | |
| return 0; | |
| } | |
| ` | |
| }) | |
| const wasmInstance = wasmInterleave(); | |
| const wasmMemory = new Uint8Array(wasmInstance.exports.memory.buffer); | |
| wasmMemory.set(BYTES, 1024); | |
| function m4() { | |
| const start = Date.now(); | |
| wasmInstance.exports.interleave(PIXELS); | |
| const borrowed = wasmMemory.subarray(12583936, 12583936+1048576*16); | |
| const end = Date.now(); | |
| DATA.set(borrowed); | |
| return end - start; | |
| } | |
| m1(); | |
| check(); | |
| m2(); | |
| check(); | |
| m3(); | |
| check(); | |
| m4(); | |
| check(); | |
| function measure(func: Function, msg: string, runs: number = 22) { | |
| let slowest = 0; | |
| let fastest = 200000; | |
| let time = 0; | |
| for (let i = 0; i < runs; ++i) { | |
| const runtime = func(); | |
| time += runtime; | |
| slowest = Math.max(slowest, runtime); | |
| fastest = Math.min(fastest, runtime); | |
| } | |
| time -= slowest; | |
| time -= fastest; | |
| const duration = time/(runs-2); | |
| console.log(msg, duration, 'ms,', Math.round(786432*16/duration/1000), 'MB/s'); | |
| } | |
| measure(m1, 'original'); | |
| measure(m2, 'slightly optimized'); | |
| measure(m3, '4 byte blocks'); | |
| measure(m4, 'wasm simd'); |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tested with: node v24
build with:
tsc -p . && inwasm 'lib/*.wasm.js'Results:
$> node lib/test.wasm.js original 50.4 ms, 250 MB/s slightly optimized 23.9 ms, 526 MB/s 4 byte blocks 8.1 ms, 1553 MB/s wasm simd 2.65 ms, 4748 MB/s