Created
February 27, 2021 11:12
-
-
Save msiemens/51757c5713c4cd6081294bafd18e8a40 to your computer and use it in GitHub Desktop.
ISF Multi-Byte Encoding (buggy!) and Decoding in Rust
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! Multi-byte encoding/decoding | |
//! | |
//! Following the multi-byte encoding described in [\[MS-ISF\]] (sections _Multi-byte Encoding of | |
//! Signed Numbers_ and _Sizes of Tags and Numbers_). | |
//! | |
//! [\[MS-ISF\]]: https://docs.microsoft.com/en-us/uwp/specifications/ink-serialized-format | |
use itertools::Itertools; | |
pub(crate) fn decode(input: &[u8]) -> Vec<u64> { | |
let mut output = vec![]; | |
// Decode the multi-byte data length | |
let (length, offset) = decode_uint(&input); | |
// The length is actually a signed value so we need to remove the sign bit | |
// (see also `decode_signed`) | |
let length = length >> 1; | |
// Decode the remaining data | |
let mut index = offset; | |
for _ in 0..length { | |
let (value, offset) = decode_uint(&input[index..]); | |
output.push(value); | |
index += offset; | |
} | |
output | |
} | |
pub(crate) fn decode_signed(input: &[u8]) -> Vec<i64> { | |
decode(input) | |
.into_iter() | |
.map(|value| { | |
let shifted = (value >> 1) as i64; | |
if value & 0x1 == 0x1 { | |
-shifted | |
} else { | |
shifted | |
} | |
}) | |
.collect() | |
} | |
fn decode_uint(data: &[u8]) -> (u64, usize) { | |
let mut value: u64 = 0; | |
let mut count = 0; | |
for byte in data { | |
let flag = byte & 0x80 == 0x80; | |
value |= (*byte as u64 & 0x7F) << (count * 7); | |
count += 1; | |
if !flag { | |
break; | |
} | |
} | |
(value, count) | |
} | |
pub(crate) fn encode_signed(input: &[i64]) -> Vec<u8> { | |
let unsigned = input | |
.into_iter() | |
.map(|value| { | |
let sign = value.signum(); | |
if sign < 0 { | |
(value.abs() << 1 & 1) as u64 | |
} else { | |
(value.abs() << 1) as u64 | |
} | |
}) | |
.collect_vec(); | |
encode(&*unsigned) | |
} | |
pub(crate) fn encode(input: &[u64]) -> Vec<u8> { | |
input | |
.into_iter() | |
.flat_map(|i| encode_uint(*i)) | |
.collect_vec() | |
} | |
fn encode_uint(mut value: u64) -> Vec<u8> { | |
let mut encoded = vec![]; // TODO: Properly size the vector | |
while value > 128 { | |
// Take first 7-bit segment | |
let segment = (value & 0x7F) as u8; | |
// Encode continuation flag | |
let byte = segment | 0x80; | |
encoded.push(byte); | |
// Continue with next 7-bit segment | |
value >>= 7; | |
} | |
// Remainer has continuation flag NOT set | |
encoded.push(value as u8); | |
encoded | |
} | |
#[cfg(test)] | |
mod test { | |
use crate::shared::multi_byte::{decode, decode_signed, encode, encode_signed}; | |
#[test] | |
fn test_encoding() { | |
assert_eq!(&decode(&encode(&[8742])), &[8742]); | |
} | |
#[test] | |
fn test_encoding_signed() { | |
// This test is broken | |
assert_eq!( | |
&decode_signed(&encode_signed(&[8742, -4, 0])), | |
&[8742, -4, 0] | |
); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment