normanrz · May 20, 2020 09:13
diff --git a/merge_volume.py b/merge_volume.py
 # VOLUME ANNOTATION MERGE
 #
 # This script merges multiple volume annotations from webKnossos.
 # In case of overlapping annotations, the last annotation wins.
 #
 # The --relabel flag will relabel the segments of each annotation
 # to be unique in the output annotation. Useful when multiple
 # annotators created segments with id 1.
 #
 # 1. Download all annotations that you want to merge.
 #    You will get a .zip file for each annotation.
 # 2. Install Python 3 (if you don't have it)
 # 3. Install the dependencies of this script:
 #    pip install -U wkw
 # 4. Run the script from the terminal
 #    python merge_volume.py volume0.zip volume1.zip volume2.zip
 #    OR
 #    python merge_volume.py --relabel volume0.zip volume1.zip volume2.zip
 # 5. The script will output a out.zip file that
 #    you can upload to webKnossos.
 #
 # License: MIT, scalable minds

 import wkw
 from zipfile import ZipFile
 import sys
 from glob import iglob
 from uuid import uuid4
 import os
 import re
 import numpy as np
 from shutil import rmtree, copyfile
 from argparse import ArgumentParser

 # Consts
 path = os.path
 BUCKET_SIZE = 32
 out_folder = str(uuid4())

 # Prelude
 parser = ArgumentParser(description="Merge webKnossos volume annotations")
 parser.add_argument(
    "--relabel",
    action="store_true",
    help="Relabel all segments with a new unique id in order to avoid label collisions.",
 )
 parser.add_argument("zip_files", nargs="+", help="Volume annotation files (.zip)")
 args = parser.parse_args()

 if len(args.zip_files) == 0:
    print("Please supply volume annotations as downloaded from webKnossos")
    sys.exit(0)

 print("Merging {} annotations: {}".format(len(args.zip_files), args.zip_files))

 # Unzip all annotation zips
 folder_names = [str(uuid4()) for _ in args.zip_files]
 for zip_file, folder_name in zip(args.zip_files, folder_names):
    os.makedirs(folder_name, exist_ok=True)
    with ZipFile(zip_file, "r") as zip_ref:
        zip_ref.extractall(folder_name)
    with ZipFile(path.join(folder_name, "data.zip"), "r") as zip_ref:
        zip_ref.extractall(folder_name)

 print("Unpacked all volume annotations")

 # Create output WKW
 out_ds = wkw.Dataset.open(
    path.join(out_folder, "1"),
    wkw.Header(
        voxel_type=np.uint32, file_len=1, block_type=wkw.Header.BLOCK_TYPE_LZ4HC
    ),
 )

 # Get buckets of all annotations
 CUBE_REGEX = re.compile(r"z(\d+)/y(\d+)/x(\d+)(\.wkw)$")


 def list_buckets(layer_path):
    output = set()
    for filename in iglob(path.join(layer_path, "*", "*", "*.wkw"), recursive=True):
        m = CUBE_REGEX.search(filename)
        if m is not None:
            output.add((int(m.group(3)), int(m.group(2)), int(m.group(1))))
    return output


 bucket_lists = [
    list_buckets(path.join(folder_name, "1")) for folder_name in folder_names
 ]

 # Collect all unique labels
 label_sets = []
 for buckets, folder_name in zip(bucket_lists, folder_names):
    with wkw.Dataset.open(path.join(folder_name, "1")) as in_ds:
        label_set = set()
        for (x, y, z) in buckets:
            offset = (x * BUCKET_SIZE, y * BUCKET_SIZE, z * BUCKET_SIZE)
            size = (BUCKET_SIZE, BUCKET_SIZE, BUCKET_SIZE)
            in_block = in_ds.read(offset, size)[0]
            label_set.update(set(in_block[in_block != 0]))
        label_sets.append(label_set)

 label_maps = []
 if args.relabel:
    i = 1
    for label_set in label_sets:
        label_map = {}
        for label in label_set:
            label_map[label] = i
            i += 1
        label_maps.append(label_map)
    print("Relabelling {} unique labels".format(sum(map(lambda a: len(a), label_maps))))
 else:
    for label_set in label_sets:
        label_maps.append({l: l for l in label_set})
    print(
        "Found {} labels, not relabelling".format(
            sum(map(lambda a: len(a), label_maps))
        )
    )

 # Merge conflicting buckets (write all non-zero items, last write wins)
 for buckets, folder_name, label_map in zip(bucket_lists, folder_names, label_maps):
    with wkw.Dataset.open(path.join(folder_name, "1")) as in_ds:
        for (x, y, z) in buckets:
            offset = (x * BUCKET_SIZE, y * BUCKET_SIZE, z * BUCKET_SIZE)
            size = (BUCKET_SIZE, BUCKET_SIZE, BUCKET_SIZE)
            out_block = out_ds.read(offset, size)[0]
            in_block = in_ds.read(offset, size)[0]
            for in_label, out_label in label_map.items():
                idx = in_block == in_label
                out_block[idx] = out_label
            out_ds.write(offset, out_block)
 print("Merged all data")

 # Create zip file
 nml_file = next(f for f in iglob(path.join(folder_names[-1], "*.nml")))

 with ZipFile("data.zip", "w") as zip_ref:
    for root, dirs, files in os.walk(path.join(out_folder, "1")):
        for file in files:
            zip_ref.write(path.join(root, file))

 with ZipFile("out.zip", "w") as zip_ref:
    zip_ref.write("data.zip")
    zip_ref.write(nml_file, arcname=path.basename(nml_file))

 # Cleanup
 for folder_name in folder_names:
    rmtree(folder_name)

 rmtree(out_folder)
 os.unlink("data.zip")

 # Done
 print("Created out.zip")
	# VOLUME ANNOTATION MERGE
	#
	# This script merges multiple volume annotations from webKnossos.
	# In case of overlapping annotations, the last annotation wins.
	#
	# The --relabel flag will relabel the segments of each annotation
	# to be unique in the output annotation. Useful when multiple
	# annotators created segments with id 1.
	#
	# 1. Download all annotations that you want to merge.
	# You will get a .zip file for each annotation.
	# 2. Install Python 3 (if you don't have it)
	# 3. Install the dependencies of this script:
	# pip install -U wkw
	# 4. Run the script from the terminal
	# python merge_volume.py volume0.zip volume1.zip volume2.zip
	# OR
	# python merge_volume.py --relabel volume0.zip volume1.zip volume2.zip
	# 5. The script will output a out.zip file that
	# you can upload to webKnossos.
	#
	# License: MIT, scalable minds

	import wkw
	from zipfile import ZipFile
	import sys
	from glob import iglob
	from uuid import uuid4
	import os
	import re
	import numpy as np
	from shutil import rmtree, copyfile
	from argparse import ArgumentParser

	# Consts
	path = os.path
	BUCKET_SIZE = 32
	out_folder = str(uuid4())

	# Prelude
	parser = ArgumentParser(description="Merge webKnossos volume annotations")
	parser.add_argument(
	"--relabel",
	action="store_true",
	help="Relabel all segments with a new unique id in order to avoid label collisions.",
	)
	parser.add_argument("zip_files", nargs="+", help="Volume annotation files (.zip)")
	args = parser.parse_args()

	if len(args.zip_files) == 0:
	print("Please supply volume annotations as downloaded from webKnossos")
	sys.exit(0)

	print("Merging {} annotations: {}".format(len(args.zip_files), args.zip_files))

	# Unzip all annotation zips
	folder_names = [str(uuid4()) for _ in args.zip_files]
	for zip_file, folder_name in zip(args.zip_files, folder_names):
	os.makedirs(folder_name, exist_ok=True)
	with ZipFile(zip_file, "r") as zip_ref:
	zip_ref.extractall(folder_name)
	with ZipFile(path.join(folder_name, "data.zip"), "r") as zip_ref:
	zip_ref.extractall(folder_name)

	print("Unpacked all volume annotations")

	# Create output WKW
	out_ds = wkw.Dataset.open(
	path.join(out_folder, "1"),
	wkw.Header(
	voxel_type=np.uint32, file_len=1, block_type=wkw.Header.BLOCK_TYPE_LZ4HC
	),
	)

	# Get buckets of all annotations
	CUBE_REGEX = re.compile(r"z(\d+)/y(\d+)/x(\d+)(\.wkw)$")


	def list_buckets(layer_path):
	output = set()
	for filename in iglob(path.join(layer_path, "", "", "*.wkw"), recursive=True):
	m = CUBE_REGEX.search(filename)
	if m is not None:
	output.add((int(m.group(3)), int(m.group(2)), int(m.group(1))))
	return output


	bucket_lists = [
	list_buckets(path.join(folder_name, "1")) for folder_name in folder_names
	]

	# Collect all unique labels
	label_sets = []
	for buckets, folder_name in zip(bucket_lists, folder_names):
	with wkw.Dataset.open(path.join(folder_name, "1")) as in_ds:
	label_set = set()
	for (x, y, z) in buckets:
	offset = (x * BUCKET_SIZE, y * BUCKET_SIZE, z * BUCKET_SIZE)
	size = (BUCKET_SIZE, BUCKET_SIZE, BUCKET_SIZE)
	in_block = in_ds.read(offset, size)[0]
	label_set.update(set(in_block[in_block != 0]))
	label_sets.append(label_set)

	label_maps = []
	if args.relabel:
	i = 1
	for label_set in label_sets:
	label_map = {}
	for label in label_set:
	label_map[label] = i
	i += 1
	label_maps.append(label_map)
	print("Relabelling {} unique labels".format(sum(map(lambda a: len(a), label_maps))))
	else:
	for label_set in label_sets:
	label_maps.append({l: l for l in label_set})
	print(
	"Found {} labels, not relabelling".format(
	sum(map(lambda a: len(a), label_maps))
	)
	)

	# Merge conflicting buckets (write all non-zero items, last write wins)
	for buckets, folder_name, label_map in zip(bucket_lists, folder_names, label_maps):
	with wkw.Dataset.open(path.join(folder_name, "1")) as in_ds:
	for (x, y, z) in buckets:
	offset = (x * BUCKET_SIZE, y * BUCKET_SIZE, z * BUCKET_SIZE)
	size = (BUCKET_SIZE, BUCKET_SIZE, BUCKET_SIZE)
	out_block = out_ds.read(offset, size)[0]
	in_block = in_ds.read(offset, size)[0]
	for in_label, out_label in label_map.items():
	idx = in_block == in_label
	out_block[idx] = out_label
	out_ds.write(offset, out_block)
	print("Merged all data")

	# Create zip file
	nml_file = next(f for f in iglob(path.join(folder_names[-1], "*.nml")))

	with ZipFile("data.zip", "w") as zip_ref:
	for root, dirs, files in os.walk(path.join(out_folder, "1")):
	for file in files:
	zip_ref.write(path.join(root, file))

	with ZipFile("out.zip", "w") as zip_ref:
	zip_ref.write("data.zip")
	zip_ref.write(nml_file, arcname=path.basename(nml_file))

	# Cleanup
	for folder_name in folder_names:
	rmtree(folder_name)

	rmtree(out_folder)
	os.unlink("data.zip")

	# Done
	print("Created out.zip")