Skip to content

Instantly share code, notes, and snippets.

@FilipDominec
Last active June 10, 2026 14:00
Show Gist options
  • Select an option

  • Save FilipDominec/560bf8a72e4c6fd79156d25434c66291 to your computer and use it in GitHub Desktop.

Select an option

Save FilipDominec/560bf8a72e4c6fd79156d25434c66291 to your computer and use it in GitHub Desktop.
TXT frames to NPZ or VTK data cube
#!/usr/bin/env python3
#-*- coding: utf-8 -*-
"""
Quick conversion from Pixet frames to a *.vtk file for viewing in Mayavi2 or Paraview.
From a directory of 2D ASCII arrays like
frames_100.txt frames_110.txt frames_12.txt frames_23.txt frames_34.txt frames_44.txt
frames_54.txt frames_64.txt frames_74.txt frames_84.txt frames_94.txt (...)
generates a single 3D numpy array (which can be easily processed by modifying the code here), and then saves it as a
scalar VTK array using the `evtk` module (https://pypi.org/project/pyevtk/).
Note that naive alphabetical sorting of the input files would scramble energy frames, this is addressed by the
`split_alpha_numeric` function below. This code was taken from https://github.com/FilipDominec/nihilnovi which can also
be used for 3D data slicing and advanced processing.
Based on nihilnovi code, tailored for Advacam colleagues by
(c) 2017-2026 Filip Dominec dominecf@fzu.cz
"""
import argparse
import logging
import numpy as np
import os
from pathlib import Path
import re
import sys
import time
np.set_printoptions(suppress=True, linewidth=1000)
def split_alpha_numeric(instring):
"""
Usual sorting algorithms do not care much about the numerical values embedded in a string.
This means e.g. that 'temperature-12' may wrongly come after 'temperature12', or '12200fish' will in
most cases come after '0.123E+05fish'. For scientific data manipulation, this is not satisfactory.
This function offers the sort_alpha_numeric() function which accepts a list of strings. For each of them,
it uses a regular expression to split it into a sub-list of interleaved non-numeric and numeric sections, the
latter being converted to true float numbers. Then, the proper order of these sub-lists can be efficiently found,
and the original names are returned.
To test the intelligent alpha-numeric sorting, try to call it add arguments as such:
>>> python3 sort_alpha_numeric.py xx-123.4zz xx-1.233e+002yy xx-123.2yy xx-123.4yy
"""
def generate_numeric_pairs(instring):
span0, span2 = 0, 0
for match in re.finditer(r'[-+]?(((\d+(\.\d*)?)|(\.\d+))([eE][+-]?\d+)?)', instring):
span1, span2 = match.span()
if span0==span1: span1+=1 ## strip hyphen if number follows a number (it is probably a date like "YYYY-MM-DD")
yield instring[span0:span1], float(instring[span1:span2]) ## non-numeric part and numeric part
span0 = span2
if len(instring)>0 and span2<len(instring):
yield instring[span2:], 0 ## do not forget the last non-numeric part, pad with zero
return list(generate_numeric_pairs(instring))
def extract_stringpart_that_differs(str_list):
"""
Recognizes alpha- and numeric- parts of a string. Getting a list of such similar strings, finds the part that differs.
>>> extract_stringpart_that_differs(['10.3K380.TIF', '10.3K400.TIF', '10.3K420.TIF',])
('λ(nm)', ('380', '400', '420'))
"""
def split_string_alpha_numeric(name):
"""
Splits a string into minimum number of chunks, so that each chunk either
1) contains number-like characters [ASCII number less than ord("A")], or,
2) contains letter-like characters [ASCII number equal or more than ord("A")].
Additionally, space and underscore are always split, serving as a forced separator.
Last dot is split, too, as it usually separates file name extension.
Number- and letter-like chunks are returned in a list of strings (no conversion).
>>> split_string_alpha_numeric('10.3K380.TIF')
['10.3', 'K', '380', 'TIF']
>>> split_string_alpha_numeric('10.3K3_80.TIF')
['10.3', 'K', '3', '80', 'TIF']
"""
return ''.join((l+' ' if (ord(r)-63)*(ord(l)-63)<0 else l)
for l,r
in zip(name,name[1:]+'_'))[::-1].replace('.',' ',1)[::-1].split()
str_list = list(str_list)
assert len(str_list)>1
assert isinstance(str_list[0], str)
for column in zip(*[split_string_alpha_numeric(name) for name in str_list]):
for field in column[1:]:
if field != column[0]:
return column
return None # i.e. all strings are the same?
def main_function(input_files, output_file, clip_hot_pixels=True):
sorted_input_files = sorted(input_files, key=split_alpha_numeric) # sort correctly
#sorted_input_files = [Path(file_path) for file_path in sorted_input_files] # convert to Path objects
frames3d = None
for n, file in enumerate(sorted_input_files):
frame = np.loadtxt(file)
if frames3d is None:
frames3d = np.empty((len(sorted_input_files), frame.shape[0], frame.shape[1]))
frames3d[n] = frame
# Note the frames are stored from index 0, even if their respective energies start from nonzero values
axes_names = ('Energy_keV', 'Y_pixel', 'X_pixel')
axes_coordinates = [
[float(Estring) for Estring in extract_stringpart_that_differs(sorted_input_files)],
np.arange(frames3d.shape[1]),
np.arange(frames3d.shape[2])
]
if clip_hot_pixels:
clip_quant, clip_factor = .999, 3
log.debug(f'Optional: clipping hot pixels (all with values over {clip_factor}× quantile {clip_quant:.3f} )')
clip_val = clip_factor * np.nanquantile(frames3d[::3,::3,::3].astype(np.float32), q=clip_quant)
frames3d = np.clip(frames3d, 0, clip_val)
if output_file.endswith('.npz'):
# The following generates a NPZ file with easy to read fields like:
#frames___49x256x256
#frames___axis0___Energy_keV
#frames___axis1___X_mm
#frames___axis2___Y_mm
npz_output_dict = {('frames___' + 'x'.join(str(s) for s in frames3d.shape)):frames3d}
for n, (axis_name, axis_coordinates) in enumerate(zip(axes_names, axes_coordinates)):
npz_output_dict[f'frames___axis{n}___{axis_name}'] = axis_coordinates
log.debug(f'Saving output_file {output_file}')
np.savez_compressed(
output_file.removesuffix('.npz'),
**npz_output_dict
)
elif output_file.endswith('.h5') or output_file.endswith('.hdf'):
import h5py
with h5py.File(output_file, 'w') as f:
f.create_dataset('frames', data=frames3d) # Save the array
elif output_file.endswith('.vtr'):
## Solution with https://pypi.org/project/pyevtk/
import pyevtk
#from pyevtk.hl import gridToVTK
z, y, x = (np.array(ax) for ax in axes_coordinates)
#pyevtk.hl.gridToVTK("./frames64", z, y, x, cellData = {'frames': frames3d.astype(np.float64)})
pyevtk.hl.gridToVTK(output_file, z, y, x, pointData={'frames': frames3d})
else:
raise RuntimeError('output extension can be *.npz or *.vtr')
log.debug("done.")
if __name__ == '__main__':
## Command-line arguments should roughly match arguments of the main function
parser = argparse.ArgumentParser(prog = ' ', description = __doc__)
pg_general = parser.add_argument_group('General options')
pg_general.add_argument('input_files', type=str, nargs='+',
help='Files to process')
pg_general.add_argument('-o', '--output_file', type=str, default='merged_frames',
help='optionally specify the output path without extension')
pg_general.add_argument('-v', '--verbose', type=int, default=1,
help='defaults to 1 (INFO messages), can be set to 0 (SILENT) or 2 (DEBUG)')
#pg_general.add_argument('-b', '--binary_option', action='store_true') # i.e. defaults to False
args = parser.parse_args()
## Preprocess the user arguments
logging.basicConfig(level={0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG}[args.verbose])
log = logging.getLogger(__name__)
main_function(args.input_files, args.output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment