Created
May 31, 2021 10:51
-
-
Save Vesihiisi/33c94345dde142412955531aef3fbfcf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
"""One trick script to | |
collate sets of tiff files across multiple | |
directories into smaller DJVUs. | |
Used for upload of manuscripts | |
from Musikverket 2021-05. | |
""" | |
import os | |
from shutil import which # used for djvu conversion | |
from subprocess import run # used for djvu conversion | |
from tqdm import tqdm | |
def list_subdirs(path): | |
return [f.path for f in os.scandir(path) if f.is_dir()] | |
def can_djvu(): | |
""" | |
Check if DjVu files can be created. | |
Check whether DjVuLibre is installed, | |
on PATH and marked as executable. | |
""" | |
return which('djvm') is not None and which('c44') is not None | |
def create_work_djvu(path): | |
parent_path = os.path.basename(os.path.dirname(path)) | |
tmp_djvu = os.path.join(path, "tmp.djvu") | |
book_djvu = parent_path + "---" + os.path.basename(os.path.normpath(path)) + ".djvu" | |
files_to_process = sorted([x for x in os.listdir(path) if x.endswith(".tif")]) | |
print("====== Starting {} ======".format(book_djvu)) | |
for i, page in tqdm(enumerate(files_to_process, 1), total=len(files_to_process)): | |
tmp_jpg = os.path.join(path, "{}.jpg".format(page)) | |
run(['convert', os.path.join(path, page), tmp_jpg], check=True) | |
run(['c44', '-crcbfull', tmp_jpg, tmp_djvu], check=True) | |
if i == 1: | |
run(['djvm', '-c', book_djvu, tmp_djvu], check=True) | |
else: | |
run(['djvm', '-i', book_djvu, tmp_djvu], check=True) | |
os.remove(tmp_jpg) | |
os.remove(tmp_djvu) | |
print("====== Completed {} ======".format(book_djvu)) | |
def main(): | |
exclude = ["system volume information", "$", "mediaexplorer", | |
"recycle", "trash", "!"] # irrelevant directories | |
for directory in sorted(list_subdirs('.')): | |
if any(substring in directory.lower() for substring in exclude): | |
continue | |
for subdirectory in list_subdirs(directory): | |
create_work_djvu(subdirectory) | |
if __name__ == "__main__": | |
if not can_djvu(): | |
raise Exception('Djvu utils djvm and c44 not found.') | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment