grenkoca · April 28, 2025 16:22
diff --git a/parse_qptiff_marker_names.py b/parse_qptiff_marker_names.py
 import xml.etree.ElementTree as ET
 from tifffile import TiffFile
 import os

 def extract_qptiff_biomarkers(file_path):
    """
    Extract biomarker names from a QPTIFF file.

    Parameters:
    -----------
    file_path : str
        Path to the QPTIFF file

    Returns:
    --------
    list
        List of biomarker names
    """

    # Check if file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")

    biomarkers = []
    fluorophores = []

    # Open the QPTIFF file using tifffile
    with TiffFile(file_path) as tif:
        for page_idx, page in enumerate(tif.series[0].pages):
            if page.description:
                try:
                    # Parse XML from the description
                    root = ET.fromstring(page.description)

                    # Extract fluorophore name for reference
                    name_element = root.find('.//Name')
                    fluorophore = (name_element.text if name_element is not None and name_element.text
                                   else f"Channel_{page_idx + 1}")
                    fluorophores.append(fluorophore)

                    # Look for Biomarker element
                    biomarker_element = root.find('.//Biomarker')
                    if biomarker_element is not None and biomarker_element.text:
                        biomarkers.append(biomarker_element.text)
                    else:
                        # Try alternative paths for biomarker
                        alternate_paths = [
                            './/BioMarker',
                            './/BioMarker/Name',
                            './/Biomarker/Name',
                            './/StainName',
                            './/Marker',
                            './/ProteinMarker'
                        ]

                        marker_found = False
                        for path in alternate_paths:
                            element = root.find(path)
                            if element is not None and element.text:
                                biomarkers.append(element.text)
                                marker_found = True
                                break

                        if not marker_found:
                            # Use fluorophore name as fallback
                            biomarkers.append(fluorophore)

                except ET.ParseError:
                    # Handle the case where the description is not valid XML
                    biomarkers.append(f"Channel_{page_idx + 1}")
                except Exception as e:
                    print(f"Error parsing page {page_idx}: {str(e)}")
                    biomarkers.append(f"Channel_{page_idx + 1}")

    return biomarkers, fluorophores

 def print_channel_info(biomarkers, fluorophores):
    """Print biomarkers and their corresponding fluorophores"""
    print(f"Found {len(biomarkers)} biomarkers:")
    print("-" * 40)
    print(f"{'#':<3} {'Biomarker':<20} {'Fluorophore':<15}")
    print("-" * 40)

    for i, (marker, fluorophore) in enumerate(zip(biomarkers, fluorophores), 1):
        print(f"{i:<3} {marker:<20} {fluorophore:<15}")

 # Example usage
 if __name__ == "__main__":
    # Replace with your QPTIFF file path
    qptiff_file = "path/to/your/file.qptiff"

    try:
        biomarkers, fluorophores = extract_qptiff_biomarkers(qptiff_file)
        print_channel_info(biomarkers, fluorophores)

        # Optionally, return just the biomarkers as a list
        print("\nBiomarker list:")
        print(biomarkers)
    except Exception as e:
        print(f"Error: {str(e)}")
	import xml.etree.ElementTree as ET
	from tifffile import TiffFile
	import os

	def extract_qptiff_biomarkers(file_path):
	"""
	Extract biomarker names from a QPTIFF file.

	Parameters:
	-----------
	file_path : str
	Path to the QPTIFF file

	Returns:
	--------
	list
	List of biomarker names
	"""

	# Check if file exists
	if not os.path.exists(file_path):
	raise FileNotFoundError(f"File not found: {file_path}")

	biomarkers = []
	fluorophores = []

	# Open the QPTIFF file using tifffile
	with TiffFile(file_path) as tif:
	for page_idx, page in enumerate(tif.series[0].pages):
	if page.description:
	try:
	# Parse XML from the description
	root = ET.fromstring(page.description)

	# Extract fluorophore name for reference
	name_element = root.find('.//Name')
	fluorophore = (name_element.text if name_element is not None and name_element.text
	else f"Channel_{page_idx + 1}")
	fluorophores.append(fluorophore)

	# Look for Biomarker element
	biomarker_element = root.find('.//Biomarker')
	if biomarker_element is not None and biomarker_element.text:
	biomarkers.append(biomarker_element.text)
	else:
	# Try alternative paths for biomarker
	alternate_paths = [
	'.//BioMarker',
	'.//BioMarker/Name',
	'.//Biomarker/Name',
	'.//StainName',
	'.//Marker',
	'.//ProteinMarker'
	]

	marker_found = False
	for path in alternate_paths:
	element = root.find(path)
	if element is not None and element.text:
	biomarkers.append(element.text)
	marker_found = True
	break

	if not marker_found:
	# Use fluorophore name as fallback
	biomarkers.append(fluorophore)

	except ET.ParseError:
	# Handle the case where the description is not valid XML
	biomarkers.append(f"Channel_{page_idx + 1}")
	except Exception as e:
	print(f"Error parsing page {page_idx}: {str(e)}")
	biomarkers.append(f"Channel_{page_idx + 1}")

	return biomarkers, fluorophores

	def print_channel_info(biomarkers, fluorophores):
	"""Print biomarkers and their corresponding fluorophores"""
	print(f"Found {len(biomarkers)} biomarkers:")
	print("-" * 40)
	print(f"{'#':<3} {'Biomarker':<20} {'Fluorophore':<15}")
	print("-" * 40)

	for i, (marker, fluorophore) in enumerate(zip(biomarkers, fluorophores), 1):
	print(f"{i:<3} {marker:<20} {fluorophore:<15}")

	# Example usage
	if __name__ == "__main__":
	# Replace with your QPTIFF file path
	qptiff_file = "path/to/your/file.qptiff"

	try:
	biomarkers, fluorophores = extract_qptiff_biomarkers(qptiff_file)
	print_channel_info(biomarkers, fluorophores)

	# Optionally, return just the biomarkers as a list
	print("\nBiomarker list:")
	print(biomarkers)
	except Exception as e:
	print(f"Error: {str(e)}")
No results found