Skip to content

Instantly share code, notes, and snippets.

@kif
Last active October 22, 2021 09:37
Show Gist options
  • Save kif/dfb190cf8fe7179b7b773bde39195391 to your computer and use it in GitHub Desktop.
Save kif/dfb190cf8fe7179b7b773bde39195391 to your computer and use it in GitHub Desktop.
Some insights about chunking & compression for MCA data in BLISS
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "ultimate-indonesian",
"metadata": {},
"source": [
"# Chuncking of dataset containing 1D data\n",
"\n",
"Case study for the storage of MCA and other ROI-collection in Bliss. Inspired by the ROI-collection of ESRF-ID22\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "indirect-mounting",
"metadata": {},
"outputs": [],
"source": [
"%matplotlib nbagg"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "prescription-animal",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"h5py v3.2.1. HDF5 v1.12.0. hdf5plugin v3.2.0\n",
"HDF5PluginBuildOptions(openmp=True, native=True, sse2=True, avx2=True, cpp11=True, filter_file_extension='.so')\n",
"{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}\n"
]
}
],
"source": [
"import tempfile\n",
"import os\n",
"import time\n",
"from collections import namedtuple\n",
"import numpy\n",
"import h5py\n",
"import hdf5plugin\n",
"from matplotlib.pyplot import subplots\n",
"\n",
"print(f\"h5py v{h5py.version.version}. HDF5 v{h5py.version.hdf5_version}. hdf5plugin v{hdf5plugin.version}\")\n",
"print(hdf5plugin.config)\n",
"print(os.sched_getaffinity(0))\n",
"\n",
"Key = namedtuple(\"Key\", \"format compression chunk\")\n",
"Res = namedtuple(\"Res\", \"compression read write\")\n",
"\n",
"M = 1<<20\n",
"\n",
"fname = \"/mnt/data/ID22/MultiAnalyzer/LaB6_35keV_mantr10_nozzle20_1/LaB6_35keV_mantr10_nozzle20_1_0001/LaB6_35keV_mantr10_nozzle20_1_0001.h5\"\n",
"hpath = \"/1.1/measurement/eiger_roi_collection\""
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "antique-spouse",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Architecture : x86_64\r\n",
"Mode(s) opératoire(s) des processeurs : 32-bit, 64-bit\r\n",
"Boutisme : Little Endian\r\n",
"Tailles des adresses: 43 bits physical, 48 bits virtual\r\n",
"Processeur(s) : 16\r\n",
"Liste de processeur(s) en ligne : 0-15\r\n",
"Thread(s) par cœur : 2\r\n",
"Cœur(s) par socket : 8\r\n",
"Socket(s) : 1\r\n",
"Nœud(s) NUMA : 1\r\n",
"Identifiant constructeur : AuthenticAMD\r\n",
"Famille de processeur : 23\r\n",
"Modèle : 49\r\n",
"Nom de modèle : AMD EPYC 7262 8-Core Processor\r\n",
"Révision : 0\r\n",
"Accroissement de fréquence : activé\r\n",
"Vitesse du processeur en MHz : 2498.267\r\n",
"Vitesse maximale du processeur en MHz : 3400,0000\r\n",
"Vitesse minimale du processeur en MHz : 1500,0000\r\n",
"BogoMIPS : 6400.27\r\n",
"Virtualisation : AMD-V\r\n",
"Cache L1d : 256 KiB\r\n",
"Cache L1i : 256 KiB\r\n",
"Cache L2 : 4 MiB\r\n",
"Cache L3 : 128 MiB\r\n",
"Nœud NUMA 0 de processeur(s) : 0-15\r\n"
]
}
],
"source": [
"!lscpu |head -n 26"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "corporate-resource",
"metadata": {},
"outputs": [],
"source": [
"t0 = time.perf_counter()\n",
"with h5py.File(fname) as h:\n",
" ds = h[hpath][()]\n",
"dt = time.perf_counter()-t0"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "conscious-ladder",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset of shape (390000, 6656) with dtype: int32, total size of 9902.344 MB\n",
"Read time (from NFS): 34.774s, Read speed: 284.765 MB/s\n",
"This is a very compressible dataset with a maximum value of 260\n"
]
},
{
"data": {
"application/javascript": [
"/* Put everything inside the global mpl namespace */\n",
"/* global mpl */\n",
"window.mpl = {};\n",
"\n",
"mpl.get_websocket_type = function () {\n",
" if (typeof WebSocket !== 'undefined') {\n",
" return WebSocket;\n",
" } else if (typeof MozWebSocket !== 'undefined') {\n",
" return MozWebSocket;\n",
" } else {\n",
" alert(\n",
" 'Your browser does not have WebSocket support. ' +\n",
" 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n",
" 'Firefox 4 and 5 are also supported but you ' +\n",
" 'have to enable WebSockets in about:config.'\n",
" );\n",
" }\n",
"};\n",
"\n",
"mpl.figure = function (figure_id, websocket, ondownload, parent_element) {\n",
" this.id = figure_id;\n",
"\n",
" this.ws = websocket;\n",
"\n",
" this.supports_binary = this.ws.binaryType !== undefined;\n",
"\n",
" if (!this.supports_binary) {\n",
" var warnings = document.getElementById('mpl-warnings');\n",
" if (warnings) {\n",
" warnings.style.display = 'block';\n",
" warnings.textContent =\n",
" 'This browser does not support binary websocket messages. ' +\n",
" 'Performance may be slow.';\n",
" }\n",
" }\n",
"\n",
" this.imageObj = new Image();\n",
"\n",
" this.context = undefined;\n",
" this.message = undefined;\n",
" this.canvas = undefined;\n",
" this.rubberband_canvas = undefined;\n",
" this.rubberband_context = undefined;\n",
" this.format_dropdown = undefined;\n",
"\n",
" this.image_mode = 'full';\n",
"\n",
" this.root = document.createElement('div');\n",
" this.root.setAttribute('style', 'display: inline-block');\n",
" this._root_extra_style(this.root);\n",
"\n",
" parent_element.appendChild(this.root);\n",
"\n",
" this._init_header(this);\n",
" this._init_canvas(this);\n",
" this._init_toolbar(this);\n",
"\n",
" var fig = this;\n",
"\n",
" this.waiting = false;\n",
"\n",
" this.ws.onopen = function () {\n",
" fig.send_message('supports_binary', { value: fig.supports_binary });\n",
" fig.send_message('send_image_mode', {});\n",
" if (fig.ratio !== 1) {\n",
" fig.send_message('set_dpi_ratio', { dpi_ratio: fig.ratio });\n",
" }\n",
" fig.send_message('refresh', {});\n",
" };\n",
"\n",
" this.imageObj.onload = function () {\n",
" if (fig.image_mode === 'full') {\n",
" // Full images could contain transparency (where diff images\n",
" // almost always do), so we need to clear the canvas so that\n",
" // there is no ghosting.\n",
" fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n",
" }\n",
" fig.context.drawImage(fig.imageObj, 0, 0);\n",
" };\n",
"\n",
" this.imageObj.onunload = function () {\n",
" fig.ws.close();\n",
" };\n",
"\n",
" this.ws.onmessage = this._make_on_message_function(this);\n",
"\n",
" this.ondownload = ondownload;\n",
"};\n",
"\n",
"mpl.figure.prototype._init_header = function () {\n",
" var titlebar = document.createElement('div');\n",
" titlebar.classList =\n",
" 'ui-dialog-titlebar ui-widget-header ui-corner-all ui-helper-clearfix';\n",
" var titletext = document.createElement('div');\n",
" titletext.classList = 'ui-dialog-title';\n",
" titletext.setAttribute(\n",
" 'style',\n",
" 'width: 100%; text-align: center; padding: 3px;'\n",
" );\n",
" titlebar.appendChild(titletext);\n",
" this.root.appendChild(titlebar);\n",
" this.header = titletext;\n",
"};\n",
"\n",
"mpl.figure.prototype._canvas_extra_style = function (_canvas_div) {};\n",
"\n",
"mpl.figure.prototype._root_extra_style = function (_canvas_div) {};\n",
"\n",
"mpl.figure.prototype._init_canvas = function () {\n",
" var fig = this;\n",
"\n",
" var canvas_div = (this.canvas_div = document.createElement('div'));\n",
" canvas_div.setAttribute(\n",
" 'style',\n",
" 'border: 1px solid #ddd;' +\n",
" 'box-sizing: content-box;' +\n",
" 'clear: both;' +\n",
" 'min-height: 1px;' +\n",
" 'min-width: 1px;' +\n",
" 'outline: 0;' +\n",
" 'overflow: hidden;' +\n",
" 'position: relative;' +\n",
" 'resize: both;'\n",
" );\n",
"\n",
" function on_keyboard_event_closure(name) {\n",
" return function (event) {\n",
" return fig.key_event(event, name);\n",
" };\n",
" }\n",
"\n",
" canvas_div.addEventListener(\n",
" 'keydown',\n",
" on_keyboard_event_closure('key_press')\n",
" );\n",
" canvas_div.addEventListener(\n",
" 'keyup',\n",
" on_keyboard_event_closure('key_release')\n",
" );\n",
"\n",
" this._canvas_extra_style(canvas_div);\n",
" this.root.appendChild(canvas_div);\n",
"\n",
" var canvas = (this.canvas = document.createElement('canvas'));\n",
" canvas.classList.add('mpl-canvas');\n",
" canvas.setAttribute('style', 'box-sizing: content-box;');\n",
"\n",
" this.context = canvas.getContext('2d');\n",
"\n",
" var backingStore =\n",
" this.context.backingStorePixelRatio ||\n",
" this.context.webkitBackingStorePixelRatio ||\n",
" this.context.mozBackingStorePixelRatio ||\n",
" this.context.msBackingStorePixelRatio ||\n",
" this.context.oBackingStorePixelRatio ||\n",
" this.context.backingStorePixelRatio ||\n",
" 1;\n",
"\n",
" this.ratio = (window.devicePixelRatio || 1) / backingStore;\n",
"\n",
" var rubberband_canvas = (this.rubberband_canvas = document.createElement(\n",
" 'canvas'\n",
" ));\n",
" rubberband_canvas.setAttribute(\n",
" 'style',\n",
" 'box-sizing: content-box; position: absolute; left: 0; top: 0; z-index: 1;'\n",
" );\n",
"\n",
" // Apply a ponyfill if ResizeObserver is not implemented by browser.\n",
" if (this.ResizeObserver === undefined) {\n",
" if (window.ResizeObserver !== undefined) {\n",
" this.ResizeObserver = window.ResizeObserver;\n",
" } else {\n",
" var obs = _JSXTOOLS_RESIZE_OBSERVER({});\n",
" this.ResizeObserver = obs.ResizeObserver;\n",
" }\n",
" }\n",
"\n",
" this.resizeObserverInstance = new this.ResizeObserver(function (entries) {\n",
" var nentries = entries.length;\n",
" for (var i = 0; i < nentries; i++) {\n",
" var entry = entries[i];\n",
" var width, height;\n",
" if (entry.contentBoxSize) {\n",
" if (entry.contentBoxSize instanceof Array) {\n",
" // Chrome 84 implements new version of spec.\n",
" width = entry.contentBoxSize[0].inlineSize;\n",
" height = entry.contentBoxSize[0].blockSize;\n",
" } else {\n",
" // Firefox implements old version of spec.\n",
" width = entry.contentBoxSize.inlineSize;\n",
" height = entry.contentBoxSize.blockSize;\n",
" }\n",
" } else {\n",
" // Chrome <84 implements even older version of spec.\n",
" width = entry.contentRect.width;\n",
" height = entry.contentRect.height;\n",
" }\n",
"\n",
" // Keep the size of the canvas and rubber band canvas in sync with\n",
" // the canvas container.\n",
" if (entry.devicePixelContentBoxSize) {\n",
" // Chrome 84 implements new version of spec.\n",
" canvas.setAttribute(\n",
" 'width',\n",
" entry.devicePixelContentBoxSize[0].inlineSize\n",
" );\n",
" canvas.setAttribute(\n",
" 'height',\n",
" entry.devicePixelContentBoxSize[0].blockSize\n",
" );\n",
" } else {\n",
" canvas.setAttribute('width', width * fig.ratio);\n",
" canvas.setAttribute('height', height * fig.ratio);\n",
" }\n",
" canvas.setAttribute(\n",
" 'style',\n",
" 'width: ' + width + 'px; height: ' + height + 'px;'\n",
" );\n",
"\n",
" rubberband_canvas.setAttribute('width', width);\n",
" rubberband_canvas.setAttribute('height', height);\n",
"\n",
" // And update the size in Python. We ignore the initial 0/0 size\n",
" // that occurs as the element is placed into the DOM, which should\n",
" // otherwise not happen due to the minimum size styling.\n",
" if (fig.ws.readyState == 1 && width != 0 && height != 0) {\n",
" fig.request_resize(width, height);\n",
" }\n",
" }\n",
" });\n",
" this.resizeObserverInstance.observe(canvas_div);\n",
"\n",
" function on_mouse_event_closure(name) {\n",
" return function (event) {\n",
" return fig.mouse_event(event, name);\n",
" };\n",
" }\n",
"\n",
" rubberband_canvas.addEventListener(\n",
" 'mousedown',\n",
" on_mouse_event_closure('button_press')\n",
" );\n",
" rubberband_canvas.addEventListener(\n",
" 'mouseup',\n",
" on_mouse_event_closure('button_release')\n",
" );\n",
" rubberband_canvas.addEventListener(\n",
" 'dblclick',\n",
" on_mouse_event_closure('dblclick')\n",
" );\n",
" // Throttle sequential mouse events to 1 every 20ms.\n",
" rubberband_canvas.addEventListener(\n",
" 'mousemove',\n",
" on_mouse_event_closure('motion_notify')\n",
" );\n",
"\n",
" rubberband_canvas.addEventListener(\n",
" 'mouseenter',\n",
" on_mouse_event_closure('figure_enter')\n",
" );\n",
" rubberband_canvas.addEventListener(\n",
" 'mouseleave',\n",
" on_mouse_event_closure('figure_leave')\n",
" );\n",
"\n",
" canvas_div.addEventListener('wheel', function (event) {\n",
" if (event.deltaY < 0) {\n",
" event.step = 1;\n",
" } else {\n",
" event.step = -1;\n",
" }\n",
" on_mouse_event_closure('scroll')(event);\n",
" });\n",
"\n",
" canvas_div.appendChild(canvas);\n",
" canvas_div.appendChild(rubberband_canvas);\n",
"\n",
" this.rubberband_context = rubberband_canvas.getContext('2d');\n",
" this.rubberband_context.strokeStyle = '#000000';\n",
"\n",
" this._resize_canvas = function (width, height, forward) {\n",
" if (forward) {\n",
" canvas_div.style.width = width + 'px';\n",
" canvas_div.style.height = height + 'px';\n",
" }\n",
" };\n",
"\n",
" // Disable right mouse context menu.\n",
" this.rubberband_canvas.addEventListener('contextmenu', function (_e) {\n",
" event.preventDefault();\n",
" return false;\n",
" });\n",
"\n",
" function set_focus() {\n",
" canvas.focus();\n",
" canvas_div.focus();\n",
" }\n",
"\n",
" window.setTimeout(set_focus, 100);\n",
"};\n",
"\n",
"mpl.figure.prototype._init_toolbar = function () {\n",
" var fig = this;\n",
"\n",
" var toolbar = document.createElement('div');\n",
" toolbar.classList = 'mpl-toolbar';\n",
" this.root.appendChild(toolbar);\n",
"\n",
" function on_click_closure(name) {\n",
" return function (_event) {\n",
" return fig.toolbar_button_onclick(name);\n",
" };\n",
" }\n",
"\n",
" function on_mouseover_closure(tooltip) {\n",
" return function (event) {\n",
" if (!event.currentTarget.disabled) {\n",
" return fig.toolbar_button_onmouseover(tooltip);\n",
" }\n",
" };\n",
" }\n",
"\n",
" fig.buttons = {};\n",
" var buttonGroup = document.createElement('div');\n",
" buttonGroup.classList = 'mpl-button-group';\n",
" for (var toolbar_ind in mpl.toolbar_items) {\n",
" var name = mpl.toolbar_items[toolbar_ind][0];\n",
" var tooltip = mpl.toolbar_items[toolbar_ind][1];\n",
" var image = mpl.toolbar_items[toolbar_ind][2];\n",
" var method_name = mpl.toolbar_items[toolbar_ind][3];\n",
"\n",
" if (!name) {\n",
" /* Instead of a spacer, we start a new button group. */\n",
" if (buttonGroup.hasChildNodes()) {\n",
" toolbar.appendChild(buttonGroup);\n",
" }\n",
" buttonGroup = document.createElement('div');\n",
" buttonGroup.classList = 'mpl-button-group';\n",
" continue;\n",
" }\n",
"\n",
" var button = (fig.buttons[name] = document.createElement('button'));\n",
" button.classList = 'mpl-widget';\n",
" button.setAttribute('role', 'button');\n",
" button.setAttribute('aria-disabled', 'false');\n",
" button.addEventListener('click', on_click_closure(method_name));\n",
" button.addEventListener('mouseover', on_mouseover_closure(tooltip));\n",
"\n",
" var icon_img = document.createElement('img');\n",
" icon_img.src = '_images/' + image + '.png';\n",
" icon_img.srcset = '_images/' + image + '_large.png 2x';\n",
" icon_img.alt = tooltip;\n",
" button.appendChild(icon_img);\n",
"\n",
" buttonGroup.appendChild(button);\n",
" }\n",
"\n",
" if (buttonGroup.hasChildNodes()) {\n",
" toolbar.appendChild(buttonGroup);\n",
" }\n",
"\n",
" var fmt_picker = document.createElement('select');\n",
" fmt_picker.classList = 'mpl-widget';\n",
" toolbar.appendChild(fmt_picker);\n",
" this.format_dropdown = fmt_picker;\n",
"\n",
" for (var ind in mpl.extensions) {\n",
" var fmt = mpl.extensions[ind];\n",
" var option = document.createElement('option');\n",
" option.selected = fmt === mpl.default_extension;\n",
" option.innerHTML = fmt;\n",
" fmt_picker.appendChild(option);\n",
" }\n",
"\n",
" var status_bar = document.createElement('span');\n",
" status_bar.classList = 'mpl-message';\n",
" toolbar.appendChild(status_bar);\n",
" this.message = status_bar;\n",
"};\n",
"\n",
"mpl.figure.prototype.request_resize = function (x_pixels, y_pixels) {\n",
" // Request matplotlib to resize the figure. Matplotlib will then trigger a resize in the client,\n",
" // which will in turn request a refresh of the image.\n",
" this.send_message('resize', { width: x_pixels, height: y_pixels });\n",
"};\n",
"\n",
"mpl.figure.prototype.send_message = function (type, properties) {\n",
" properties['type'] = type;\n",
" properties['figure_id'] = this.id;\n",
" this.ws.send(JSON.stringify(properties));\n",
"};\n",
"\n",
"mpl.figure.prototype.send_draw_message = function () {\n",
" if (!this.waiting) {\n",
" this.waiting = true;\n",
" this.ws.send(JSON.stringify({ type: 'draw', figure_id: this.id }));\n",
" }\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_save = function (fig, _msg) {\n",
" var format_dropdown = fig.format_dropdown;\n",
" var format = format_dropdown.options[format_dropdown.selectedIndex].value;\n",
" fig.ondownload(fig, format);\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_resize = function (fig, msg) {\n",
" var size = msg['size'];\n",
" if (size[0] !== fig.canvas.width || size[1] !== fig.canvas.height) {\n",
" fig._resize_canvas(size[0], size[1], msg['forward']);\n",
" fig.send_message('refresh', {});\n",
" }\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_rubberband = function (fig, msg) {\n",
" var x0 = msg['x0'] / fig.ratio;\n",
" var y0 = (fig.canvas.height - msg['y0']) / fig.ratio;\n",
" var x1 = msg['x1'] / fig.ratio;\n",
" var y1 = (fig.canvas.height - msg['y1']) / fig.ratio;\n",
" x0 = Math.floor(x0) + 0.5;\n",
" y0 = Math.floor(y0) + 0.5;\n",
" x1 = Math.floor(x1) + 0.5;\n",
" y1 = Math.floor(y1) + 0.5;\n",
" var min_x = Math.min(x0, x1);\n",
" var min_y = Math.min(y0, y1);\n",
" var width = Math.abs(x1 - x0);\n",
" var height = Math.abs(y1 - y0);\n",
"\n",
" fig.rubberband_context.clearRect(\n",
" 0,\n",
" 0,\n",
" fig.canvas.width / fig.ratio,\n",
" fig.canvas.height / fig.ratio\n",
" );\n",
"\n",
" fig.rubberband_context.strokeRect(min_x, min_y, width, height);\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_figure_label = function (fig, msg) {\n",
" // Updates the figure title.\n",
" fig.header.textContent = msg['label'];\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_cursor = function (fig, msg) {\n",
" var cursor = msg['cursor'];\n",
" switch (cursor) {\n",
" case 0:\n",
" cursor = 'pointer';\n",
" break;\n",
" case 1:\n",
" cursor = 'default';\n",
" break;\n",
" case 2:\n",
" cursor = 'crosshair';\n",
" break;\n",
" case 3:\n",
" cursor = 'move';\n",
" break;\n",
" }\n",
" fig.rubberband_canvas.style.cursor = cursor;\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_message = function (fig, msg) {\n",
" fig.message.textContent = msg['message'];\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_draw = function (fig, _msg) {\n",
" // Request the server to send over a new figure.\n",
" fig.send_draw_message();\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_image_mode = function (fig, msg) {\n",
" fig.image_mode = msg['mode'];\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_history_buttons = function (fig, msg) {\n",
" for (var key in msg) {\n",
" if (!(key in fig.buttons)) {\n",
" continue;\n",
" }\n",
" fig.buttons[key].disabled = !msg[key];\n",
" fig.buttons[key].setAttribute('aria-disabled', !msg[key]);\n",
" }\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_navigate_mode = function (fig, msg) {\n",
" if (msg['mode'] === 'PAN') {\n",
" fig.buttons['Pan'].classList.add('active');\n",
" fig.buttons['Zoom'].classList.remove('active');\n",
" } else if (msg['mode'] === 'ZOOM') {\n",
" fig.buttons['Pan'].classList.remove('active');\n",
" fig.buttons['Zoom'].classList.add('active');\n",
" } else {\n",
" fig.buttons['Pan'].classList.remove('active');\n",
" fig.buttons['Zoom'].classList.remove('active');\n",
" }\n",
"};\n",
"\n",
"mpl.figure.prototype.updated_canvas_event = function () {\n",
" // Called whenever the canvas gets updated.\n",
" this.send_message('ack', {});\n",
"};\n",
"\n",
"// A function to construct a web socket function for onmessage handling.\n",
"// Called in the figure constructor.\n",
"mpl.figure.prototype._make_on_message_function = function (fig) {\n",
" return function socket_on_message(evt) {\n",
" if (evt.data instanceof Blob) {\n",
" var img = evt.data;\n",
" if (img.type !== 'image/png') {\n",
" /* FIXME: We get \"Resource interpreted as Image but\n",
" * transferred with MIME type text/plain:\" errors on\n",
" * Chrome. But how to set the MIME type? It doesn't seem\n",
" * to be part of the websocket stream */\n",
" img.type = 'image/png';\n",
" }\n",
"\n",
" /* Free the memory for the previous frames */\n",
" if (fig.imageObj.src) {\n",
" (window.URL || window.webkitURL).revokeObjectURL(\n",
" fig.imageObj.src\n",
" );\n",
" }\n",
"\n",
" fig.imageObj.src = (window.URL || window.webkitURL).createObjectURL(\n",
" img\n",
" );\n",
" fig.updated_canvas_event();\n",
" fig.waiting = false;\n",
" return;\n",
" } else if (\n",
" typeof evt.data === 'string' &&\n",
" evt.data.slice(0, 21) === 'data:image/png;base64'\n",
" ) {\n",
" fig.imageObj.src = evt.data;\n",
" fig.updated_canvas_event();\n",
" fig.waiting = false;\n",
" return;\n",
" }\n",
"\n",
" var msg = JSON.parse(evt.data);\n",
" var msg_type = msg['type'];\n",
"\n",
" // Call the \"handle_{type}\" callback, which takes\n",
" // the figure and JSON message as its only arguments.\n",
" try {\n",
" var callback = fig['handle_' + msg_type];\n",
" } catch (e) {\n",
" console.log(\n",
" \"No handler for the '\" + msg_type + \"' message type: \",\n",
" msg\n",
" );\n",
" return;\n",
" }\n",
"\n",
" if (callback) {\n",
" try {\n",
" // console.log(\"Handling '\" + msg_type + \"' message: \", msg);\n",
" callback(fig, msg);\n",
" } catch (e) {\n",
" console.log(\n",
" \"Exception inside the 'handler_\" + msg_type + \"' callback:\",\n",
" e,\n",
" e.stack,\n",
" msg\n",
" );\n",
" }\n",
" }\n",
" };\n",
"};\n",
"\n",
"// from http://stackoverflow.com/questions/1114465/getting-mouse-location-in-canvas\n",
"mpl.findpos = function (e) {\n",
" //this section is from http://www.quirksmode.org/js/events_properties.html\n",
" var targ;\n",
" if (!e) {\n",
" e = window.event;\n",
" }\n",
" if (e.target) {\n",
" targ = e.target;\n",
" } else if (e.srcElement) {\n",
" targ = e.srcElement;\n",
" }\n",
" if (targ.nodeType === 3) {\n",
" // defeat Safari bug\n",
" targ = targ.parentNode;\n",
" }\n",
"\n",
" // pageX,Y are the mouse positions relative to the document\n",
" var boundingRect = targ.getBoundingClientRect();\n",
" var x = e.pageX - (boundingRect.left + document.body.scrollLeft);\n",
" var y = e.pageY - (boundingRect.top + document.body.scrollTop);\n",
"\n",
" return { x: x, y: y };\n",
"};\n",
"\n",
"/*\n",
" * return a copy of an object with only non-object keys\n",
" * we need this to avoid circular references\n",
" * http://stackoverflow.com/a/24161582/3208463\n",
" */\n",
"function simpleKeys(original) {\n",
" return Object.keys(original).reduce(function (obj, key) {\n",
" if (typeof original[key] !== 'object') {\n",
" obj[key] = original[key];\n",
" }\n",
" return obj;\n",
" }, {});\n",
"}\n",
"\n",
"mpl.figure.prototype.mouse_event = function (event, name) {\n",
" var canvas_pos = mpl.findpos(event);\n",
"\n",
" if (name === 'button_press') {\n",
" this.canvas.focus();\n",
" this.canvas_div.focus();\n",
" }\n",
"\n",
" var x = canvas_pos.x * this.ratio;\n",
" var y = canvas_pos.y * this.ratio;\n",
"\n",
" this.send_message(name, {\n",
" x: x,\n",
" y: y,\n",
" button: event.button,\n",
" step: event.step,\n",
" guiEvent: simpleKeys(event),\n",
" });\n",
"\n",
" /* This prevents the web browser from automatically changing to\n",
" * the text insertion cursor when the button is pressed. We want\n",
" * to control all of the cursor setting manually through the\n",
" * 'cursor' event from matplotlib */\n",
" event.preventDefault();\n",
" return false;\n",
"};\n",
"\n",
"mpl.figure.prototype._key_event_extra = function (_event, _name) {\n",
" // Handle any extra behaviour associated with a key event\n",
"};\n",
"\n",
"mpl.figure.prototype.key_event = function (event, name) {\n",
" // Prevent repeat events\n",
" if (name === 'key_press') {\n",
" if (event.key === this._key) {\n",
" return;\n",
" } else {\n",
" this._key = event.key;\n",
" }\n",
" }\n",
" if (name === 'key_release') {\n",
" this._key = null;\n",
" }\n",
"\n",
" var value = '';\n",
" if (event.ctrlKey && event.key !== 'Control') {\n",
" value += 'ctrl+';\n",
" }\n",
" else if (event.altKey && event.key !== 'Alt') {\n",
" value += 'alt+';\n",
" }\n",
" else if (event.shiftKey && event.key !== 'Shift') {\n",
" value += 'shift+';\n",
" }\n",
"\n",
" value += 'k' + event.key;\n",
"\n",
" this._key_event_extra(event, name);\n",
"\n",
" this.send_message(name, { key: value, guiEvent: simpleKeys(event) });\n",
" return false;\n",
"};\n",
"\n",
"mpl.figure.prototype.toolbar_button_onclick = function (name) {\n",
" if (name === 'download') {\n",
" this.handle_save(this, null);\n",
" } else {\n",
" this.send_message('toolbar_button', { name: name });\n",
" }\n",
"};\n",
"\n",
"mpl.figure.prototype.toolbar_button_onmouseover = function (tooltip) {\n",
" this.message.textContent = tooltip;\n",
"};\n",
"\n",
"///////////////// REMAINING CONTENT GENERATED BY embed_js.py /////////////////\n",
"// prettier-ignore\n",
"var _JSXTOOLS_RESIZE_OBSERVER=function(A){var t,i=new WeakMap,n=new WeakMap,a=new WeakMap,r=new WeakMap,o=new Set;function s(e){if(!(this instanceof s))throw new TypeError(\"Constructor requires 'new' operator\");i.set(this,e)}function h(){throw new TypeError(\"Function is not a constructor\")}function c(e,t,i,n){e=0 in arguments?Number(arguments[0]):0,t=1 in arguments?Number(arguments[1]):0,i=2 in arguments?Number(arguments[2]):0,n=3 in arguments?Number(arguments[3]):0,this.right=(this.x=this.left=e)+(this.width=i),this.bottom=(this.y=this.top=t)+(this.height=n),Object.freeze(this)}function d(){t=requestAnimationFrame(d);var s=new WeakMap,p=new Set;o.forEach((function(t){r.get(t).forEach((function(i){var r=t instanceof window.SVGElement,o=a.get(t),d=r?0:parseFloat(o.paddingTop),f=r?0:parseFloat(o.paddingRight),l=r?0:parseFloat(o.paddingBottom),u=r?0:parseFloat(o.paddingLeft),g=r?0:parseFloat(o.borderTopWidth),m=r?0:parseFloat(o.borderRightWidth),w=r?0:parseFloat(o.borderBottomWidth),b=u+f,F=d+l,v=(r?0:parseFloat(o.borderLeftWidth))+m,W=g+w,y=r?0:t.offsetHeight-W-t.clientHeight,E=r?0:t.offsetWidth-v-t.clientWidth,R=b+v,z=F+W,M=r?t.width:parseFloat(o.width)-R-E,O=r?t.height:parseFloat(o.height)-z-y;if(n.has(t)){var k=n.get(t);if(k[0]===M&&k[1]===O)return}n.set(t,[M,O]);var S=Object.create(h.prototype);S.target=t,S.contentRect=new c(u,d,M,O),s.has(i)||(s.set(i,[]),p.add(i)),s.get(i).push(S)}))})),p.forEach((function(e){i.get(e).call(e,s.get(e),e)}))}return s.prototype.observe=function(i){if(i instanceof window.Element){r.has(i)||(r.set(i,new Set),o.add(i),a.set(i,window.getComputedStyle(i)));var n=r.get(i);n.has(this)||n.add(this),cancelAnimationFrame(t),t=requestAnimationFrame(d)}},s.prototype.unobserve=function(i){if(i instanceof window.Element&&r.has(i)){var n=r.get(i);n.has(this)&&(n.delete(this),n.size||(r.delete(i),o.delete(i))),n.size||r.delete(i),o.size||cancelAnimationFrame(t)}},A.DOMRectReadOnly=c,A.ResizeObserver=s,A.ResizeObserverEntry=h,A}; // eslint-disable-line\n",
"mpl.toolbar_items = [[\"Home\", \"Reset original view\", \"fa fa-home icon-home\", \"home\"], [\"Back\", \"Back to previous view\", \"fa fa-arrow-left icon-arrow-left\", \"back\"], [\"Forward\", \"Forward to next view\", \"fa fa-arrow-right icon-arrow-right\", \"forward\"], [\"\", \"\", \"\", \"\"], [\"Pan\", \"Left button pans, Right button zooms\\nx/y fixes axis, CTRL fixes aspect\", \"fa fa-arrows icon-move\", \"pan\"], [\"Zoom\", \"Zoom to rectangle\\nx/y fixes axis, CTRL fixes aspect\", \"fa fa-square-o icon-check-empty\", \"zoom\"], [\"\", \"\", \"\", \"\"], [\"Download\", \"Download plot\", \"fa fa-floppy-o icon-save\", \"download\"]];\n",
"\n",
"mpl.extensions = [\"eps\", \"jpeg\", \"pgf\", \"pdf\", \"png\", \"ps\", \"raw\", \"svg\", \"tif\"];\n",
"\n",
"mpl.default_extension = \"png\";/* global mpl */\n",
"\n",
"var comm_websocket_adapter = function (comm) {\n",
" // Create a \"websocket\"-like object which calls the given IPython comm\n",
" // object with the appropriate methods. Currently this is a non binary\n",
" // socket, so there is still some room for performance tuning.\n",
" var ws = {};\n",
"\n",
" ws.binaryType = comm.kernel.ws.binaryType;\n",
" ws.readyState = comm.kernel.ws.readyState;\n",
" function updateReadyState(_event) {\n",
" if (comm.kernel.ws) {\n",
" ws.readyState = comm.kernel.ws.readyState;\n",
" } else {\n",
" ws.readyState = 3; // Closed state.\n",
" }\n",
" }\n",
" comm.kernel.ws.addEventListener('open', updateReadyState);\n",
" comm.kernel.ws.addEventListener('close', updateReadyState);\n",
" comm.kernel.ws.addEventListener('error', updateReadyState);\n",
"\n",
" ws.close = function () {\n",
" comm.close();\n",
" };\n",
" ws.send = function (m) {\n",
" //console.log('sending', m);\n",
" comm.send(m);\n",
" };\n",
" // Register the callback with on_msg.\n",
" comm.on_msg(function (msg) {\n",
" //console.log('receiving', msg['content']['data'], msg);\n",
" var data = msg['content']['data'];\n",
" if (data['blob'] !== undefined) {\n",
" data = {\n",
" data: new Blob(msg['buffers'], { type: data['blob'] }),\n",
" };\n",
" }\n",
" // Pass the mpl event to the overridden (by mpl) onmessage function.\n",
" ws.onmessage(data);\n",
" });\n",
" return ws;\n",
"};\n",
"\n",
"mpl.mpl_figure_comm = function (comm, msg) {\n",
" // This is the function which gets called when the mpl process\n",
" // starts-up an IPython Comm through the \"matplotlib\" channel.\n",
"\n",
" var id = msg.content.data.id;\n",
" // Get hold of the div created by the display call when the Comm\n",
" // socket was opened in Python.\n",
" var element = document.getElementById(id);\n",
" var ws_proxy = comm_websocket_adapter(comm);\n",
"\n",
" function ondownload(figure, _format) {\n",
" window.open(figure.canvas.toDataURL());\n",
" }\n",
"\n",
" var fig = new mpl.figure(id, ws_proxy, ondownload, element);\n",
"\n",
" // Call onopen now - mpl needs it, as it is assuming we've passed it a real\n",
" // web socket which is closed, not our websocket->open comm proxy.\n",
" ws_proxy.onopen();\n",
"\n",
" fig.parent_element = element;\n",
" fig.cell_info = mpl.find_output_cell(\"<div id='\" + id + \"'></div>\");\n",
" if (!fig.cell_info) {\n",
" console.error('Failed to find cell for figure', id, fig);\n",
" return;\n",
" }\n",
" fig.cell_info[0].output_area.element.on(\n",
" 'cleared',\n",
" { fig: fig },\n",
" fig._remove_fig_handler\n",
" );\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_close = function (fig, msg) {\n",
" var width = fig.canvas.width / fig.ratio;\n",
" fig.cell_info[0].output_area.element.off(\n",
" 'cleared',\n",
" fig._remove_fig_handler\n",
" );\n",
" fig.resizeObserverInstance.unobserve(fig.canvas_div);\n",
"\n",
" // Update the output cell to use the data from the current canvas.\n",
" fig.push_to_output();\n",
" var dataURL = fig.canvas.toDataURL();\n",
" // Re-enable the keyboard manager in IPython - without this line, in FF,\n",
" // the notebook keyboard shortcuts fail.\n",
" IPython.keyboard_manager.enable();\n",
" fig.parent_element.innerHTML =\n",
" '<img src=\"' + dataURL + '\" width=\"' + width + '\">';\n",
" fig.close_ws(fig, msg);\n",
"};\n",
"\n",
"mpl.figure.prototype.close_ws = function (fig, msg) {\n",
" fig.send_message('closing', msg);\n",
" // fig.ws.close()\n",
"};\n",
"\n",
"mpl.figure.prototype.push_to_output = function (_remove_interactive) {\n",
" // Turn the data on the canvas into data in the output cell.\n",
" var width = this.canvas.width / this.ratio;\n",
" var dataURL = this.canvas.toDataURL();\n",
" this.cell_info[1]['text/html'] =\n",
" '<img src=\"' + dataURL + '\" width=\"' + width + '\">';\n",
"};\n",
"\n",
"mpl.figure.prototype.updated_canvas_event = function () {\n",
" // Tell IPython that the notebook contents must change.\n",
" IPython.notebook.set_dirty(true);\n",
" this.send_message('ack', {});\n",
" var fig = this;\n",
" // Wait a second, then push the new image to the DOM so\n",
" // that it is saved nicely (might be nice to debounce this).\n",
" setTimeout(function () {\n",
" fig.push_to_output();\n",
" }, 1000);\n",
"};\n",
"\n",
"mpl.figure.prototype._init_toolbar = function () {\n",
" var fig = this;\n",
"\n",
" var toolbar = document.createElement('div');\n",
" toolbar.classList = 'btn-toolbar';\n",
" this.root.appendChild(toolbar);\n",
"\n",
" function on_click_closure(name) {\n",
" return function (_event) {\n",
" return fig.toolbar_button_onclick(name);\n",
" };\n",
" }\n",
"\n",
" function on_mouseover_closure(tooltip) {\n",
" return function (event) {\n",
" if (!event.currentTarget.disabled) {\n",
" return fig.toolbar_button_onmouseover(tooltip);\n",
" }\n",
" };\n",
" }\n",
"\n",
" fig.buttons = {};\n",
" var buttonGroup = document.createElement('div');\n",
" buttonGroup.classList = 'btn-group';\n",
" var button;\n",
" for (var toolbar_ind in mpl.toolbar_items) {\n",
" var name = mpl.toolbar_items[toolbar_ind][0];\n",
" var tooltip = mpl.toolbar_items[toolbar_ind][1];\n",
" var image = mpl.toolbar_items[toolbar_ind][2];\n",
" var method_name = mpl.toolbar_items[toolbar_ind][3];\n",
"\n",
" if (!name) {\n",
" /* Instead of a spacer, we start a new button group. */\n",
" if (buttonGroup.hasChildNodes()) {\n",
" toolbar.appendChild(buttonGroup);\n",
" }\n",
" buttonGroup = document.createElement('div');\n",
" buttonGroup.classList = 'btn-group';\n",
" continue;\n",
" }\n",
"\n",
" button = fig.buttons[name] = document.createElement('button');\n",
" button.classList = 'btn btn-default';\n",
" button.href = '#';\n",
" button.title = name;\n",
" button.innerHTML = '<i class=\"fa ' + image + ' fa-lg\"></i>';\n",
" button.addEventListener('click', on_click_closure(method_name));\n",
" button.addEventListener('mouseover', on_mouseover_closure(tooltip));\n",
" buttonGroup.appendChild(button);\n",
" }\n",
"\n",
" if (buttonGroup.hasChildNodes()) {\n",
" toolbar.appendChild(buttonGroup);\n",
" }\n",
"\n",
" // Add the status bar.\n",
" var status_bar = document.createElement('span');\n",
" status_bar.classList = 'mpl-message pull-right';\n",
" toolbar.appendChild(status_bar);\n",
" this.message = status_bar;\n",
"\n",
" // Add the close button to the window.\n",
" var buttongrp = document.createElement('div');\n",
" buttongrp.classList = 'btn-group inline pull-right';\n",
" button = document.createElement('button');\n",
" button.classList = 'btn btn-mini btn-primary';\n",
" button.href = '#';\n",
" button.title = 'Stop Interaction';\n",
" button.innerHTML = '<i class=\"fa fa-power-off icon-remove icon-large\"></i>';\n",
" button.addEventListener('click', function (_evt) {\n",
" fig.handle_close(fig, {});\n",
" });\n",
" button.addEventListener(\n",
" 'mouseover',\n",
" on_mouseover_closure('Stop Interaction')\n",
" );\n",
" buttongrp.appendChild(button);\n",
" var titlebar = this.root.querySelector('.ui-dialog-titlebar');\n",
" titlebar.insertBefore(buttongrp, titlebar.firstChild);\n",
"};\n",
"\n",
"mpl.figure.prototype._remove_fig_handler = function (event) {\n",
" var fig = event.data.fig;\n",
" if (event.target !== this) {\n",
" // Ignore bubbled events from children.\n",
" return;\n",
" }\n",
" fig.close_ws(fig, {});\n",
"};\n",
"\n",
"mpl.figure.prototype._root_extra_style = function (el) {\n",
" el.style.boxSizing = 'content-box'; // override notebook setting of border-box.\n",
"};\n",
"\n",
"mpl.figure.prototype._canvas_extra_style = function (el) {\n",
" // this is important to make the div 'focusable\n",
" el.setAttribute('tabindex', 0);\n",
" // reach out to IPython and tell the keyboard manager to turn it's self\n",
" // off when our div gets focus\n",
"\n",
" // location in version 3\n",
" if (IPython.notebook.keyboard_manager) {\n",
" IPython.notebook.keyboard_manager.register_events(el);\n",
" } else {\n",
" // location in version 2\n",
" IPython.keyboard_manager.register_events(el);\n",
" }\n",
"};\n",
"\n",
"mpl.figure.prototype._key_event_extra = function (event, _name) {\n",
" var manager = IPython.notebook.keyboard_manager;\n",
" if (!manager) {\n",
" manager = IPython.keyboard_manager;\n",
" }\n",
"\n",
" // Check for shift+enter\n",
" if (event.shiftKey && event.which === 13) {\n",
" this.canvas_div.blur();\n",
" // select the cell after this one\n",
" var index = IPython.notebook.find_cell_index(this.cell_info[0]);\n",
" IPython.notebook.select(index + 1);\n",
" }\n",
"};\n",
"\n",
"mpl.figure.prototype.handle_save = function (fig, _msg) {\n",
" fig.ondownload(fig, null);\n",
"};\n",
"\n",
"mpl.find_output_cell = function (html_output) {\n",
" // Return the cell and output element which can be found *uniquely* in the notebook.\n",
" // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n",
" // IPython event is triggered only after the cells have been serialised, which for\n",
" // our purposes (turning an active figure into a static one), is too late.\n",
" var cells = IPython.notebook.get_cells();\n",
" var ncells = cells.length;\n",
" for (var i = 0; i < ncells; i++) {\n",
" var cell = cells[i];\n",
" if (cell.cell_type === 'code') {\n",
" for (var j = 0; j < cell.output_area.outputs.length; j++) {\n",
" var data = cell.output_area.outputs[j];\n",
" if (data.data) {\n",
" // IPython >= 3 moved mimebundle to data attribute of output\n",
" data = data.data;\n",
" }\n",
" if (data['text/html'] === html_output) {\n",
" return [cell, data, j];\n",
" }\n",
" }\n",
" }\n",
" }\n",
"};\n",
"\n",
"// Register the function which deals with the matplotlib target/channel.\n",
"// The kernel may be null if the page has been refreshed.\n",
"if (IPython.notebook.kernel !== null) {\n",
" IPython.notebook.kernel.comm_manager.register_target(\n",
" 'matplotlib',\n",
" mpl.mpl_figure_comm\n",
" );\n",
"}\n"
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<img src=\"\" width=\"639.85\">"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"Text(0.5, 0, 'ROI value')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(f\"Dataset of shape {ds.shape} with dtype: {ds.dtype}, total size of {ds.nbytes/M:.3f} MB\")\n",
"print(f\"Read time (from NFS): {dt:.3f}s, Read speed: {ds.nbytes/M/dt:.3f} MB/s\")\n",
"m = ds.max()\n",
"print(\"This is a very compressible dataset with a maximum value of\", m)\n",
"fig, ax = subplots()\n",
"ax.hist(ds.ravel(), m+1)\n",
"ax.set_title(\"Histogram of ROI values\")\n",
"ax.set_yscale(\"log\")\n",
"ax.set_ylabel(\"Occurences\")\n",
"ax.set_xlabel(\"ROI value\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "formed-ladder",
"metadata": {},
"outputs": [],
"source": [
"def bench(key):\n",
" res = None\n",
" os.system(\"sync\")\n",
" with tempfile.TemporaryDirectory() as d:\n",
" if key.format == \"numpy\":\n",
" fname = os.path.join(d, \"numpy.npy\")\n",
" t0 = time.perf_counter()\n",
" numpy.save(fname, ds)\n",
" t1 = time.perf_counter()\n",
" ds1 = numpy.load(fname)\n",
" t2 = time.perf_counter()\n",
" elif key.format == \"hdf5\":\n",
" fname = os.path.join(d, \"file.h5\")\n",
" if key.compression == \"bitshuffle\":\n",
" cmp = hdf5plugin.Bitshuffle()\n",
" else:\n",
" cmp = {\"compression\":key.compression}\n",
" t0 = time.perf_counter()\n",
" with h5py.File(fname, \"w\") as h:\n",
" h.create_dataset(\"data\", data=ds, chunks=key.chunk, **cmp)\n",
" t1 = time.perf_counter()\n",
" with h5py.File(fname, \"r\") as h:\n",
" ds1 = h[\"data\"][()]\n",
" t2 = time.perf_counter()\n",
" assert abs(ds1-ds).max() == 0\n",
" res = Res(ds.nbytes/os.stat(fname).st_size, ds.nbytes/M/(t2-t1), ds.nbytes/M/(t1-t0))\n",
" return res\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "canadian-battlefield",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{Key(format='numpy', compression=None, chunk=None): Res(compression=0.999999987672584, read=4557.3690840703375, write=1591.1733778477628)}\n"
]
}
],
"source": [
"results = {}\n",
"key = Key(\"numpy\", None, None)\n",
"results[key] = bench(key)\n",
"print(results)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "commercial-publisher",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Key(format='hdf5', compression=None, chunk=None) Res(compression=0.9999998027613801, read=4740.710243687177, write=1590.85635712532)\n",
"Key(format='hdf5', compression=None, chunk=(1, 1664)) Res(compression=0.9930296992060388, read=1217.2936255579752, write=625.5655888179406)\n",
"Key(format='hdf5', compression=None, chunk=(1, 6656)) Res(compression=0.9982481664538824, read=2576.1340075650933, write=987.3396274666414)\n",
"Key(format='hdf5', compression=None, chunk=(40, 6656)) Res(compression=0.9999557773502066, read=4584.855723496288, write=1400.5383090330129)\n",
"Key(format='hdf5', compression=None, chunk=(160, 1664)) Res(compression=0.9997507093506732, read=3825.364627557245, write=1374.6310640228137)\n"
]
}
],
"source": [
"for k in [(\"hdf5\", None, None),\n",
" (\"hdf5\", None, (1,1664)),\n",
" (\"hdf5\", None, (1,6656)),\n",
" (\"hdf5\", None, (40,6656)),\n",
" (\"hdf5\", None, (160,1664))]:\n",
" key = Key(*k)\n",
" results[key] = bench(key)\n",
" print(key, results[key])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "vocal-genome",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Key(format='hdf5', compression='gzip', chunk=None) Res(compression=39.78027719788861, read=477.12773076606135, write=189.29843477263105)\n",
"Key(format='hdf5', compression='gzip', chunk=(1, 1664)) Res(compression=26.602934295946124, read=317.32810699302485, write=170.03321179279254)\n",
"Key(format='hdf5', compression='gzip', chunk=(1, 6656)) Res(compression=34.53257668982696, read=446.63243669626235, write=230.92592586648894)\n",
"Key(format='hdf5', compression='gzip', chunk=(40, 6656)) Res(compression=38.81519874522585, read=530.0937871707857, write=187.2893327456712)\n",
"Key(format='hdf5', compression='gzip', chunk=(160, 1664)) Res(compression=39.37773055496157, read=518.6832108881214, write=187.79710544837886)\n"
]
}
],
"source": [
"for k in [(\"hdf5\", \"gzip\", None),\n",
" (\"hdf5\", \"gzip\", (1,1664)),\n",
" (\"hdf5\", \"gzip\", (1,6656)),\n",
" (\"hdf5\", \"gzip\", (40,6656)),\n",
" (\"hdf5\", \"gzip\", (160,1664))]:\n",
" key = Key(*k)\n",
" results[key] = bench(key)\n",
" print(key, results[key])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "conditional-colombia",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Key(format='hdf5', compression='bitshuffle', chunk=None) Res(compression=34.48097650060628, read=1175.538050913075, write=1132.9698839668529)\n",
"Key(format='hdf5', compression='bitshuffle', chunk=(1, 1664)) Res(compression=25.891379581397512, read=290.720847721375, write=226.63448569650572)\n",
"Key(format='hdf5', compression='bitshuffle', chunk=(1, 6656)) Res(compression=31.35532673245106, read=581.3902571212135, write=543.2917022423106)\n",
"Key(format='hdf5', compression='bitshuffle', chunk=(40, 6656)) Res(compression=34.34958110991905, read=2446.856758234593, write=1461.2653063086007)\n",
"Key(format='hdf5', compression='bitshuffle', chunk=(160, 1664)) Res(compression=34.36789580388631, read=1901.9301071433877, write=1569.7570068929156)\n"
]
}
],
"source": [
"for k in [(\"hdf5\", \"bitshuffle\", None),\n",
" (\"hdf5\", \"bitshuffle\", (1,1664)),\n",
" (\"hdf5\", \"bitshuffle\", (1,6656)),\n",
" (\"hdf5\", \"bitshuffle\", (40,6656)),\n",
" (\"hdf5\", \"bitshuffle\", (160,1664))]:\n",
" key = Key(*k)\n",
" results[key] = bench(key)\n",
" print(key, results[key])"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "balanced-sheep",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Format | Compressor | chunks | Compression | Read (MB/s) | Write (MB/s)\n",
"numpy | None | None | 1.000 | 4557.369 | 1591.173\n",
"hdf5 | None | None | 1.000 | 4740.710 | 1590.856\n",
"hdf5 | None | (1, 1664) | 0.993 | 1217.294 | 625.566\n",
"hdf5 | None | (1, 6656) | 0.998 | 2576.134 | 987.340\n",
"hdf5 | None | (40, 6656) | 1.000 | 4584.856 | 1400.538\n",
"hdf5 | None | (160, 1664) | 1.000 | 3825.365 | 1374.631\n",
"hdf5 | gzip | None | 39.780 | 477.128 | 189.298\n",
"hdf5 | gzip | (1, 1664) | 26.603 | 317.328 | 170.033\n",
"hdf5 | gzip | (1, 6656) | 34.533 | 446.632 | 230.926\n",
"hdf5 | gzip | (40, 6656) | 38.815 | 530.094 | 187.289\n",
"hdf5 | gzip | (160, 1664) | 39.378 | 518.683 | 187.797\n",
"hdf5 | bitshuffle | None | 34.481 | 1175.538 | 1132.970\n",
"hdf5 | bitshuffle | (1, 1664) | 25.891 | 290.721 | 226.634\n",
"hdf5 | bitshuffle | (1, 6656) | 31.355 | 581.390 | 543.292\n",
"hdf5 | bitshuffle | (40, 6656) | 34.350 | 2446.857 | 1461.265\n",
"hdf5 | bitshuffle | (160, 1664) | 34.368 | 1901.930 | 1569.757\n"
]
}
],
"source": [
"print(f\"{'Format':10s} | {'Compressor':10s} | {'chunks':11s} | {'Compression':11s} | {'Read (MB/s)':11s} | {'Write (MB/s)':10s}\")\n",
"for k, v in results.items():\n",
" print(f\"{k.format:10s} | {str(k.compression):10s} | {str(k.chunk) or '-':11s} | {v.compression:11.3f} | {v.read:11.3f} | {v.write:10.3f}\")"
]
},
{
"cell_type": "markdown",
"id": "whole-thesaurus",
"metadata": {},
"source": [
"# Auto-chunking function\n",
"First, the default implementation:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "correct-cambridge",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 1664)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def guess_chunk(scan_shape, detector_shape):\n",
" \"\"\"Simplified form of h5py._hl.filters.guess_chunk\"\"\"\n",
" if detector_shape:\n",
" return tuple(1 for _ in scan_shape) + tuple(\n",
" min(n, max(n // 4, 256)) if n else 256 for n in detector_shape)\n",
" else:\n",
" return tuple(min(n, 256) if n else 256 for n in scan_shape)\n",
"\n",
"guess_chunk((390000,), (6656,))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "native-energy",
"metadata": {},
"outputs": [],
"source": [
"from math import ceil\n",
"def auto_chunk(scan_shape, detector_shape, \n",
" split=4, block=1<<20, dtype=numpy.int32):\n",
" \"\"\"Try to guess the optimal chunk size with those constrains:\n",
" * Always split any dimension for partial accesses\n",
" * close to the block size (approximative when compressing)\n",
" \n",
" :param scan_shape: tuple with the dimentions of the scan\n",
" :param detector_shape: tuple with the dimentions of every data-point\n",
" :param split: split the inner-most dimentions at least by this value\n",
" :param block: target size (in bytes) of chunk (uncompressed)\n",
" :param dtype: data type to get the size of every element\n",
" :return: tuple with the optimal chunk size\n",
" \"\"\"\n",
" if detector_shape:\n",
" shape = tuple(scan_shape) + tuple(detector_shape)\n",
" else:\n",
" shape = tuple(scan_shape)\n",
" itemsize = numpy.dtype(dtype).itemsize\n",
" size = numpy.prod(shape)\n",
" nbytes = size * itemsize\n",
" if nbytes > block:\n",
" target = block//itemsize\n",
" size = 1\n",
" lst = []\n",
" for i in shape[-1::-1]:\n",
" if size >=target:\n",
" j = 1\n",
" elif i * size / split > 2.0 * target:\n",
" j = int(ceil(target/size))\n",
" else:\n",
" j = int(ceil(i/split))\n",
" lst.insert(0, j)\n",
" size *= j\n",
" chunks = tuple(lst)\n",
" else:\n",
" chunks = shape\n",
" return chunks"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "suburban-minnesota",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(158, 1664)\n",
"(5, 250, 250)\n",
"(1, 512, 512)\n",
"(10, 20)\n"
]
}
],
"source": [
"print(auto_chunk((390000,), (6656,)))\n",
"print(auto_chunk((100,), (1000, 1000)))\n",
"print(auto_chunk((100,), (2048, 2048)))\n",
"print(auto_chunk((10,), (20,)))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "digital-stock",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(40, 6656)\n",
"(1, 263, 1000)\n",
"(1, 128, 2048)\n"
]
}
],
"source": [
"print(auto_chunk((390000,), (6656,), split=1))\n",
"print(auto_chunk((100,), (1000, 1000), split=1))\n",
"print(auto_chunk((100,), (2048, 2048), split=1))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9 (local venv)",
"language": "python",
"name": "py39-venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment