Last active
June 10, 2026 14:00
-
-
Save FilipDominec/560bf8a72e4c6fd79156d25434c66291 to your computer and use it in GitHub Desktop.
TXT frames to NPZ or VTK data cube
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| #-*- coding: utf-8 -*- | |
| """ | |
| Quick conversion from Pixet frames to a *.vtk file for viewing in Mayavi2 or Paraview. | |
| From a directory of 2D ASCII arrays like | |
| frames_100.txt frames_110.txt frames_12.txt frames_23.txt frames_34.txt frames_44.txt | |
| frames_54.txt frames_64.txt frames_74.txt frames_84.txt frames_94.txt (...) | |
| generates a single 3D numpy array (which can be easily processed by modifying the code here), and then saves it as a | |
| scalar VTK array using the `evtk` module (https://pypi.org/project/pyevtk/). | |
| Note that naive alphabetical sorting of the input files would scramble energy frames, this is addressed by the | |
| `split_alpha_numeric` function below. This code was taken from https://github.com/FilipDominec/nihilnovi which can also | |
| be used for 3D data slicing and advanced processing. | |
| Based on nihilnovi code, tailored for Advacam colleagues by | |
| (c) 2017-2026 Filip Dominec dominecf@fzu.cz | |
| """ | |
| import argparse | |
| import logging | |
| import numpy as np | |
| import os | |
| from pathlib import Path | |
| import re | |
| import sys | |
| import time | |
| np.set_printoptions(suppress=True, linewidth=1000) | |
| def split_alpha_numeric(instring): | |
| """ | |
| Usual sorting algorithms do not care much about the numerical values embedded in a string. | |
| This means e.g. that 'temperature-12' may wrongly come after 'temperature12', or '12200fish' will in | |
| most cases come after '0.123E+05fish'. For scientific data manipulation, this is not satisfactory. | |
| This function offers the sort_alpha_numeric() function which accepts a list of strings. For each of them, | |
| it uses a regular expression to split it into a sub-list of interleaved non-numeric and numeric sections, the | |
| latter being converted to true float numbers. Then, the proper order of these sub-lists can be efficiently found, | |
| and the original names are returned. | |
| To test the intelligent alpha-numeric sorting, try to call it add arguments as such: | |
| >>> python3 sort_alpha_numeric.py xx-123.4zz xx-1.233e+002yy xx-123.2yy xx-123.4yy | |
| """ | |
| def generate_numeric_pairs(instring): | |
| span0, span2 = 0, 0 | |
| for match in re.finditer(r'[-+]?(((\d+(\.\d*)?)|(\.\d+))([eE][+-]?\d+)?)', instring): | |
| span1, span2 = match.span() | |
| if span0==span1: span1+=1 ## strip hyphen if number follows a number (it is probably a date like "YYYY-MM-DD") | |
| yield instring[span0:span1], float(instring[span1:span2]) ## non-numeric part and numeric part | |
| span0 = span2 | |
| if len(instring)>0 and span2<len(instring): | |
| yield instring[span2:], 0 ## do not forget the last non-numeric part, pad with zero | |
| return list(generate_numeric_pairs(instring)) | |
| def extract_stringpart_that_differs(str_list): | |
| """ | |
| Recognizes alpha- and numeric- parts of a string. Getting a list of such similar strings, finds the part that differs. | |
| >>> extract_stringpart_that_differs(['10.3K380.TIF', '10.3K400.TIF', '10.3K420.TIF',]) | |
| ('λ(nm)', ('380', '400', '420')) | |
| """ | |
| def split_string_alpha_numeric(name): | |
| """ | |
| Splits a string into minimum number of chunks, so that each chunk either | |
| 1) contains number-like characters [ASCII number less than ord("A")], or, | |
| 2) contains letter-like characters [ASCII number equal or more than ord("A")]. | |
| Additionally, space and underscore are always split, serving as a forced separator. | |
| Last dot is split, too, as it usually separates file name extension. | |
| Number- and letter-like chunks are returned in a list of strings (no conversion). | |
| >>> split_string_alpha_numeric('10.3K380.TIF') | |
| ['10.3', 'K', '380', 'TIF'] | |
| >>> split_string_alpha_numeric('10.3K3_80.TIF') | |
| ['10.3', 'K', '3', '80', 'TIF'] | |
| """ | |
| return ''.join((l+' ' if (ord(r)-63)*(ord(l)-63)<0 else l) | |
| for l,r | |
| in zip(name,name[1:]+'_'))[::-1].replace('.',' ',1)[::-1].split() | |
| str_list = list(str_list) | |
| assert len(str_list)>1 | |
| assert isinstance(str_list[0], str) | |
| for column in zip(*[split_string_alpha_numeric(name) for name in str_list]): | |
| for field in column[1:]: | |
| if field != column[0]: | |
| return column | |
| return None # i.e. all strings are the same? | |
| def main_function(input_files, output_file, clip_hot_pixels=True): | |
| sorted_input_files = sorted(input_files, key=split_alpha_numeric) # sort correctly | |
| #sorted_input_files = [Path(file_path) for file_path in sorted_input_files] # convert to Path objects | |
| frames3d = None | |
| for n, file in enumerate(sorted_input_files): | |
| frame = np.loadtxt(file) | |
| if frames3d is None: | |
| frames3d = np.empty((len(sorted_input_files), frame.shape[0], frame.shape[1])) | |
| frames3d[n] = frame | |
| # Note the frames are stored from index 0, even if their respective energies start from nonzero values | |
| axes_names = ('Energy_keV', 'Y_pixel', 'X_pixel') | |
| axes_coordinates = [ | |
| [float(Estring) for Estring in extract_stringpart_that_differs(sorted_input_files)], | |
| np.arange(frames3d.shape[1]), | |
| np.arange(frames3d.shape[2]) | |
| ] | |
| if clip_hot_pixels: | |
| clip_quant, clip_factor = .999, 3 | |
| log.debug(f'Optional: clipping hot pixels (all with values over {clip_factor}× quantile {clip_quant:.3f} )') | |
| clip_val = clip_factor * np.nanquantile(frames3d[::3,::3,::3].astype(np.float32), q=clip_quant) | |
| frames3d = np.clip(frames3d, 0, clip_val) | |
| if output_file.endswith('.npz'): | |
| # The following generates a NPZ file with easy to read fields like: | |
| #frames___49x256x256 | |
| #frames___axis0___Energy_keV | |
| #frames___axis1___X_mm | |
| #frames___axis2___Y_mm | |
| npz_output_dict = {('frames___' + 'x'.join(str(s) for s in frames3d.shape)):frames3d} | |
| for n, (axis_name, axis_coordinates) in enumerate(zip(axes_names, axes_coordinates)): | |
| npz_output_dict[f'frames___axis{n}___{axis_name}'] = axis_coordinates | |
| log.debug(f'Saving output_file {output_file}') | |
| np.savez_compressed( | |
| output_file.removesuffix('.npz'), | |
| **npz_output_dict | |
| ) | |
| elif output_file.endswith('.h5') or output_file.endswith('.hdf'): | |
| import h5py | |
| with h5py.File(output_file, 'w') as f: | |
| f.create_dataset('frames', data=frames3d) # Save the array | |
| elif output_file.endswith('.vtr'): | |
| ## Solution with https://pypi.org/project/pyevtk/ | |
| import pyevtk | |
| #from pyevtk.hl import gridToVTK | |
| z, y, x = (np.array(ax) for ax in axes_coordinates) | |
| #pyevtk.hl.gridToVTK("./frames64", z, y, x, cellData = {'frames': frames3d.astype(np.float64)}) | |
| pyevtk.hl.gridToVTK(output_file, z, y, x, pointData={'frames': frames3d}) | |
| else: | |
| raise RuntimeError('output extension can be *.npz or *.vtr') | |
| log.debug("done.") | |
| if __name__ == '__main__': | |
| ## Command-line arguments should roughly match arguments of the main function | |
| parser = argparse.ArgumentParser(prog = ' ', description = __doc__) | |
| pg_general = parser.add_argument_group('General options') | |
| pg_general.add_argument('input_files', type=str, nargs='+', | |
| help='Files to process') | |
| pg_general.add_argument('-o', '--output_file', type=str, default='merged_frames', | |
| help='optionally specify the output path without extension') | |
| pg_general.add_argument('-v', '--verbose', type=int, default=1, | |
| help='defaults to 1 (INFO messages), can be set to 0 (SILENT) or 2 (DEBUG)') | |
| #pg_general.add_argument('-b', '--binary_option', action='store_true') # i.e. defaults to False | |
| args = parser.parse_args() | |
| ## Preprocess the user arguments | |
| logging.basicConfig(level={0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG}[args.verbose]) | |
| log = logging.getLogger(__name__) | |
| main_function(args.input_files, args.output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment