FilipDominec · June 10, 2026 14:00
diff --git a/txt2npz_vtk.py b/txt2npz_vtk.py
 #!/usr/bin/env python3  
 #-*- coding: utf-8 -*-

 """
 Quick conversion from Pixet frames to a *.vtk file for viewing in Mayavi2 or Paraview. 

 From a directory of 2D ASCII arrays like

    frames_100.txt  frames_110.txt  frames_12.txt  frames_23.txt  frames_34.txt  frames_44.txt  
    frames_54.txt  frames_64.txt  frames_74.txt  frames_84.txt  frames_94.txt (...)

 generates a single 3D numpy array (which can be easily processed by modifying the code here), and then saves it as a
 scalar VTK array using the `evtk` module (https://pypi.org/project/pyevtk/).

 Note that naive alphabetical sorting of the input files would scramble energy frames, this is addressed by the
 `split_alpha_numeric` function below. This code was taken from https://github.com/FilipDominec/nihilnovi which can also
 be used for 3D data slicing and advanced processing.

 Based on nihilnovi code, tailored for Advacam colleagues by 
 (c) 2017-2026 Filip Dominec dominecf@fzu.cz 
 """

 import argparse
 import logging
 import numpy as np
 import os
 from pathlib import Path
 import re
 import sys
 import time
 np.set_printoptions(suppress=True, linewidth=1000)


 def split_alpha_numeric(instring):
    """
    Usual sorting algorithms do not care much about the numerical values embedded in a string.
    This means e.g. that 'temperature-12' may wrongly come after 'temperature12', or '12200fish' will in 
    most cases come after '0.123E+05fish'. For scientific data manipulation, this is not satisfactory.

    This function offers the sort_alpha_numeric() function which accepts a list of strings. For each of them,
    it uses a regular expression to split it into a sub-list of interleaved non-numeric and numeric sections, the 
    latter being converted to true float numbers. Then, the proper order of these sub-lists can be efficiently found, 
    and the original names are returned. 

    To test the intelligent alpha-numeric sorting, try to call it add arguments as such:
    >>> python3 sort_alpha_numeric.py xx-123.4zz xx-1.233e+002yy xx-123.2yy xx-123.4yy
    """
    def generate_numeric_pairs(instring):
        span0, span2 = 0, 0
        for match in re.finditer(r'[-+]?(((\d+(\.\d*)?)|(\.\d+))([eE][+-]?\d+)?)', instring):
            span1, span2 = match.span()
            if span0==span1: span1+=1 ## strip hyphen if number follows a number (it is probably a date like "YYYY-MM-DD")
            yield instring[span0:span1], float(instring[span1:span2])   ## non-numeric part and numeric part
            span0 = span2
        if len(instring)>0 and span2<len(instring):
            yield instring[span2:], 0                  ## do not forget the last non-numeric part, pad with zero

    return list(generate_numeric_pairs(instring))


 def extract_stringpart_that_differs(str_list):
    """
    Recognizes alpha- and numeric- parts of a string. Getting a list of such similar strings, finds the part that differs.

    >>> extract_stringpart_that_differs(['10.3K380.TIF', '10.3K400.TIF', '10.3K420.TIF',])
    ('λ(nm)', ('380', '400', '420'))
    """
    def split_string_alpha_numeric(name):
        """
        Splits a string into minimum number of chunks, so that each chunk either
        1) contains number-like characters [ASCII number less than ord("A")], or,
        2) contains letter-like characters [ASCII number equal or more than ord("A")].
        Additionally, space and underscore are always split, serving as a forced separator.
        Last dot is split, too, as it usually separates file name extension.
        Number- and letter-like chunks are returned in a list of strings (no conversion).
        >>> split_string_alpha_numeric('10.3K380.TIF')
        ['10.3', 'K', '380', 'TIF']
        >>> split_string_alpha_numeric('10.3K3_80.TIF')
        ['10.3', 'K', '3', '80', 'TIF']
        """
        return ''.join((l+' ' if (ord(r)-63)*(ord(l)-63)<0 else l) 
                       for l,r 
                       in zip(name,name[1:]+'_'))[::-1].replace('.',' ',1)[::-1].split()
    str_list = list(str_list)
    assert len(str_list)>1
    assert isinstance(str_list[0], str)
    for column in zip(*[split_string_alpha_numeric(name) for name in str_list]):
        for field in column[1:]:
            if field != column[0]:
                return column
    return None # i.e. all strings are the same?


 def main_function(input_files, output_file, clip_hot_pixels=True):

    sorted_input_files = sorted(input_files, key=split_alpha_numeric) # sort correctly

    #sorted_input_files = [Path(file_path) for file_path in sorted_input_files]  # convert to Path objects 


    frames3d = None
    for n, file in enumerate(sorted_input_files):
        frame = np.loadtxt(file)
        if frames3d is None: 
            frames3d = np.empty((len(sorted_input_files), frame.shape[0], frame.shape[1]))
        frames3d[n] = frame
        # Note the frames are stored from index 0, even if their respective energies start from nonzero values


    axes_names = ('Energy_keV', 'Y_pixel', 'X_pixel')
    axes_coordinates = [
         [float(Estring) for Estring in extract_stringpart_that_differs(sorted_input_files)],
         np.arange(frames3d.shape[1]),
         np.arange(frames3d.shape[2])
         ]

    if clip_hot_pixels:
        clip_quant, clip_factor = .999, 3
        log.debug(f'Optional: clipping hot pixels (all with values over {clip_factor}× quantile {clip_quant:.3f} )')
        clip_val = clip_factor * np.nanquantile(frames3d[::3,::3,::3].astype(np.float32), q=clip_quant) 
        frames3d = np.clip(frames3d, 0, clip_val)

    if output_file.endswith('.npz'):
        # The following generates a NPZ file with easy to read fields like:
                    #frames___49x256x256
                    #frames___axis0___Energy_keV
                    #frames___axis1___X_mm
                    #frames___axis2___Y_mm
        npz_output_dict = {('frames___' + 'x'.join(str(s) for s in frames3d.shape)):frames3d}
        for n, (axis_name, axis_coordinates) in enumerate(zip(axes_names, axes_coordinates)):
            npz_output_dict[f'frames___axis{n}___{axis_name}'] = axis_coordinates

        log.debug(f'Saving output_file {output_file}')
        np.savez_compressed(
                output_file.removesuffix('.npz'),
                **npz_output_dict
                )
    elif output_file.endswith('.h5') or output_file.endswith('.hdf'):
        import h5py
        with h5py.File(output_file, 'w') as f:
            f.create_dataset('frames', data=frames3d)  # Save the array
    elif output_file.endswith('.vtr'):
        ## Solution with https://pypi.org/project/pyevtk/
        import pyevtk
        #from pyevtk.hl import gridToVTK
        z, y, x = (np.array(ax) for ax in axes_coordinates)
        #pyevtk.hl.gridToVTK("./frames64", z, y, x, cellData = {'frames': frames3d.astype(np.float64)})
        pyevtk.hl.gridToVTK(output_file, z, y, x, pointData={'frames': frames3d})
    else:
        raise RuntimeError('output extension can be *.npz or *.vtr')

    log.debug("done.")


 if __name__ == '__main__':
    ## Command-line arguments should roughly match arguments of the main function
    parser = argparse.ArgumentParser(prog = ' ', description = __doc__)
    pg_general = parser.add_argument_group('General options')
    pg_general.add_argument('input_files', type=str, nargs='+', 
            help='Files to process')
    pg_general.add_argument('-o', '--output_file', type=str, default='merged_frames',
                        help='optionally specify the output path without extension')
    pg_general.add_argument('-v', '--verbose', type=int, default=1,
                        help='defaults to 1 (INFO messages), can be set to 0 (SILENT) or 2 (DEBUG)')
    #pg_general.add_argument('-b', '--binary_option', action='store_true') # i.e. defaults to False
    args = parser.parse_args()

    ## Preprocess the user arguments
    logging.basicConfig(level={0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG}[args.verbose])
    log = logging.getLogger(__name__)

    main_function(args.input_files, args.output_file)
	#!/usr/bin/env python3
	#-- coding: utf-8 --

	"""
	Quick conversion from Pixet frames to a *.vtk file for viewing in Mayavi2 or Paraview.

	From a directory of 2D ASCII arrays like

	frames_100.txt frames_110.txt frames_12.txt frames_23.txt frames_34.txt frames_44.txt
	frames_54.txt frames_64.txt frames_74.txt frames_84.txt frames_94.txt (...)

	generates a single 3D numpy array (which can be easily processed by modifying the code here), and then saves it as a
	scalar VTK array using the `evtk` module (https://pypi.org/project/pyevtk/).

	Note that naive alphabetical sorting of the input files would scramble energy frames, this is addressed by the
	`split_alpha_numeric` function below. This code was taken from https://github.com/FilipDominec/nihilnovi which can also
	be used for 3D data slicing and advanced processing.

	Based on nihilnovi code, tailored for Advacam colleagues by
	(c) 2017-2026 Filip Dominec dominecf@fzu.cz
	"""

	import argparse
	import logging
	import numpy as np
	import os
	from pathlib import Path
	import re
	import sys
	import time
	np.set_printoptions(suppress=True, linewidth=1000)


	def split_alpha_numeric(instring):
	"""
	Usual sorting algorithms do not care much about the numerical values embedded in a string.
	This means e.g. that 'temperature-12' may wrongly come after 'temperature12', or '12200fish' will in
	most cases come after '0.123E+05fish'. For scientific data manipulation, this is not satisfactory.

	This function offers the sort_alpha_numeric() function which accepts a list of strings. For each of them,
	it uses a regular expression to split it into a sub-list of interleaved non-numeric and numeric sections, the
	latter being converted to true float numbers. Then, the proper order of these sub-lists can be efficiently found,
	and the original names are returned.

	To test the intelligent alpha-numeric sorting, try to call it add arguments as such:
	>>> python3 sort_alpha_numeric.py xx-123.4zz xx-1.233e+002yy xx-123.2yy xx-123.4yy
	"""
	def generate_numeric_pairs(instring):
	span0, span2 = 0, 0
	for match in re.finditer(r'[-+]?(((\d+(\.\d*)?)\|(\.\d+))([eE][+-]?\d+)?)', instring):
	span1, span2 = match.span()
	if span0==span1: span1+=1 ## strip hyphen if number follows a number (it is probably a date like "YYYY-MM-DD")
	yield instring[span0:span1], float(instring[span1:span2]) ## non-numeric part and numeric part
	span0 = span2
	if len(instring)>0 and span2<len(instring):
	yield instring[span2:], 0 ## do not forget the last non-numeric part, pad with zero

	return list(generate_numeric_pairs(instring))


	def extract_stringpart_that_differs(str_list):
	"""
	Recognizes alpha- and numeric- parts of a string. Getting a list of such similar strings, finds the part that differs.

	>>> extract_stringpart_that_differs(['10.3K380.TIF', '10.3K400.TIF', '10.3K420.TIF',])
	('λ(nm)', ('380', '400', '420'))
	"""
	def split_string_alpha_numeric(name):
	"""
	Splits a string into minimum number of chunks, so that each chunk either
	1) contains number-like characters [ASCII number less than ord("A")], or,
	2) contains letter-like characters [ASCII number equal or more than ord("A")].
	Additionally, space and underscore are always split, serving as a forced separator.
	Last dot is split, too, as it usually separates file name extension.
	Number- and letter-like chunks are returned in a list of strings (no conversion).
	>>> split_string_alpha_numeric('10.3K380.TIF')
	['10.3', 'K', '380', 'TIF']
	>>> split_string_alpha_numeric('10.3K3_80.TIF')
	['10.3', 'K', '3', '80', 'TIF']
	"""
	return ''.join((l+' ' if (ord(r)-63)*(ord(l)-63)<0 else l)
	for l,r
	in zip(name,name[1:]+'_'))[::-1].replace('.',' ',1)[::-1].split()
	str_list = list(str_list)
	assert len(str_list)>1
	assert isinstance(str_list[0], str)
	for column in zip(*[split_string_alpha_numeric(name) for name in str_list]):
	for field in column[1:]:
	if field != column[0]:
	return column
	return None # i.e. all strings are the same?


	def main_function(input_files, output_file, clip_hot_pixels=True):

	sorted_input_files = sorted(input_files, key=split_alpha_numeric) # sort correctly

	#sorted_input_files = [Path(file_path) for file_path in sorted_input_files] # convert to Path objects


	frames3d = None
	for n, file in enumerate(sorted_input_files):
	frame = np.loadtxt(file)
	if frames3d is None:
	frames3d = np.empty((len(sorted_input_files), frame.shape[0], frame.shape[1]))
	frames3d[n] = frame
	# Note the frames are stored from index 0, even if their respective energies start from nonzero values


	axes_names = ('Energy_keV', 'Y_pixel', 'X_pixel')
	axes_coordinates = [
	[float(Estring) for Estring in extract_stringpart_that_differs(sorted_input_files)],
	np.arange(frames3d.shape[1]),
	np.arange(frames3d.shape[2])
	]

	if clip_hot_pixels:
	clip_quant, clip_factor = .999, 3
	log.debug(f'Optional: clipping hot pixels (all with values over {clip_factor}× quantile {clip_quant:.3f} )')
	clip_val = clip_factor * np.nanquantile(frames3d[::3,::3,::3].astype(np.float32), q=clip_quant)
	frames3d = np.clip(frames3d, 0, clip_val)

	if output_file.endswith('.npz'):
	# The following generates a NPZ file with easy to read fields like:
	#frames___49x256x256
	#frames___axis0___Energy_keV
	#frames___axis1___X_mm
	#frames___axis2___Y_mm
	npz_output_dict = {('frames___' + 'x'.join(str(s) for s in frames3d.shape)):frames3d}
	for n, (axis_name, axis_coordinates) in enumerate(zip(axes_names, axes_coordinates)):
	npz_output_dict[f'frames___axis{n}___{axis_name}'] = axis_coordinates

	log.debug(f'Saving output_file {output_file}')
	np.savez_compressed(
	output_file.removesuffix('.npz'),
	**npz_output_dict
	)
	elif output_file.endswith('.h5') or output_file.endswith('.hdf'):
	import h5py
	with h5py.File(output_file, 'w') as f:
	f.create_dataset('frames', data=frames3d) # Save the array
	elif output_file.endswith('.vtr'):
	## Solution with https://pypi.org/project/pyevtk/
	import pyevtk
	#from pyevtk.hl import gridToVTK
	z, y, x = (np.array(ax) for ax in axes_coordinates)
	#pyevtk.hl.gridToVTK("./frames64", z, y, x, cellData = {'frames': frames3d.astype(np.float64)})
	pyevtk.hl.gridToVTK(output_file, z, y, x, pointData={'frames': frames3d})
	else:
	raise RuntimeError('output extension can be .npz or .vtr')

	log.debug("done.")


	if __name__ == '__main__':
	## Command-line arguments should roughly match arguments of the main function
	parser = argparse.ArgumentParser(prog = ' ', description = __doc__)
	pg_general = parser.add_argument_group('General options')
	pg_general.add_argument('input_files', type=str, nargs='+',
	help='Files to process')
	pg_general.add_argument('-o', '--output_file', type=str, default='merged_frames',
	help='optionally specify the output path without extension')
	pg_general.add_argument('-v', '--verbose', type=int, default=1,
	help='defaults to 1 (INFO messages), can be set to 0 (SILENT) or 2 (DEBUG)')
	#pg_general.add_argument('-b', '--binary_option', action='store_true') # i.e. defaults to False
	args = parser.parse_args()

	## Preprocess the user arguments
	logging.basicConfig(level={0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG}[args.verbose])
	log = logging.getLogger(__name__)

	main_function(args.input_files, args.output_file)
No results found