Last active
November 26, 2024 01:06
-
-
Save tlrmchlsmth/8067f1b24a82b6e2f90450e7764fa103 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import msgpack | |
import pickle | |
import time | |
import json | |
from msgspec import msgpack as msgspec_msgpack | |
import struct | |
from io import BytesIO | |
def benchmark_methods(arr, num_iterations=1000): | |
results = {} | |
methods = { | |
'numpy.save': { | |
'serialize': lambda x: numpy_save(x), | |
'deserialize': lambda x: numpy_load(x) | |
}, | |
'pickle': { | |
'serialize': lambda x: pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL), | |
'deserialize': lambda x: pickle.loads(x) | |
}, | |
'msgpack_raw': { | |
'serialize': lambda x: msgpack.packb(x.tobytes()), | |
'deserialize': lambda x: np.frombuffer(msgpack.unpackb(x), dtype=arr.dtype).reshape(arr.shape) | |
}, | |
'msgpack_with_shape': { | |
'serialize': lambda x: msgpack.packb({'shape': x.shape, 'dtype': str(x.dtype), 'data': x.tobytes()}), | |
'deserialize': lambda x: np.frombuffer(msgpack.unpackb(x)['data'], | |
dtype=np.dtype(msgpack.unpackb(x)['dtype'])) | |
.reshape(msgpack.unpackb(x)['shape']) | |
}, | |
'msgspec': { | |
'serialize': lambda x: msgspec_msgpack.encode({'shape': x.shape, 'dtype': str(x.dtype), 'data': x.tobytes()}), | |
'deserialize': lambda x: np.frombuffer(msgspec_msgpack.decode(x)['data'], | |
dtype=np.dtype(msgspec_msgpack.decode(x)['dtype'])) | |
.reshape(msgspec_msgpack.decode(x)['shape']) | |
}, | |
'custom_binary': { | |
'serialize': lambda x: struct.pack('B', len(x.shape)) + struct.pack(f'{len(x.shape)}I', *x.shape) + x.tobytes(), | |
'deserialize': lambda x: deserialize_custom_binary(x, arr.dtype) | |
} | |
} | |
# Warm-up run | |
for name, method in methods.items(): | |
serialized = method['serialize'](arr) | |
method['deserialize'](serialized) | |
# Benchmark each method | |
for name, method in methods.items(): | |
# Serialization timing | |
start_time = time.perf_counter() | |
for _ in range(num_iterations): | |
serialized = method['serialize'](arr) | |
serialize_time = (time.perf_counter() - start_time) / num_iterations | |
# Deserialization timing | |
start_time = time.perf_counter() | |
for _ in range(num_iterations): | |
deserialized = method['deserialize'](serialized) | |
deserialize_time = (time.perf_counter() - start_time) / num_iterations | |
# Verify correctness | |
final = method['deserialize'](serialized) | |
is_equal = np.array_equal(arr, final) | |
results[name] = { | |
'serialize_time': serialize_time, | |
'deserialize_time': deserialize_time, | |
'total_time': serialize_time + deserialize_time, | |
'size': len(serialized), | |
'correct': is_equal | |
} | |
return results | |
def numpy_save(arr): | |
bio = BytesIO() | |
np.save(bio, arr) | |
return bio.getvalue() | |
def numpy_load(data): | |
bio = BytesIO(data) | |
return np.load(bio) | |
def deserialize_custom_binary(data, dtype): | |
# First byte is number of dimensions | |
ndim = struct.unpack('B', data[:1])[0] | |
# Read shape (starts after ndim byte) | |
shape_size = ndim * struct.calcsize('I') | |
shape = struct.unpack(f'{ndim}I', data[1:1+shape_size]) | |
# Convert remaining bytes to array | |
return np.frombuffer(data[1+shape_size:], dtype=dtype).reshape(shape) | |
def print_results(results): | |
print("\nResults:") | |
print(f"{'Method':<20} {'Serialize (μs)':<15} {'Deserialize (μs)':<15} {'Total (μs)':<15} {'Size (bytes)':<15} {'Correct':<8}") | |
print("-" * 90) | |
# Sort by total time | |
sorted_results = dict(sorted(results.items(), key=lambda x: x[1]['total_time'])) | |
for method, data in sorted_results.items(): | |
print(f"{method:<20} {data['serialize_time']*1e6:>8.2f} {data['deserialize_time']*1e6:>8.2f} " | |
f"{data['total_time']*1e6:>8.2f} {data['size']:>8,d} {str(data['correct']):>5}") | |
def run_benchmarks(): | |
# Test with different array sizes and types | |
test_arrays = [ | |
('small_int', np.random.randint(0, 100, (100, 100), dtype=np.int32)), | |
('medium_int', np.random.randint(0, 100, (1000, 1000), dtype=np.int32)), | |
('small_float', np.random.random((100, 100)).astype(np.float32)), | |
('medium_float', np.random.random((1000, 1000)).astype(np.float32)) | |
] | |
for name, arr in test_arrays: | |
print(f"\nBenchmarking {name} array:") | |
print(f"Shape: {arr.shape}, dtype: {arr.dtype}") | |
results = benchmark_methods(arr) | |
print_results(results) | |
if __name__ == "__main__": | |
run_benchmarks() |
Author
tlrmchlsmth
commented
Nov 25, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment