Last active
June 17, 2025 18:20
-
-
Save PIPIPIG233666/e260dce0d1505c784f411585e515762d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import glob | |
import os | |
import tarfile | |
from multiprocessing import cpu_count, Pool | |
from tqdm import tqdm | |
def extract_tgz(path): | |
""" | |
Extract a single .tgz archive into a folder named after the archive | |
(without its .tgz extension), using an explicit filter to avoid | |
the Python-3.14 deprecation warning. | |
""" | |
out_dir = os.path.splitext(path)[0] | |
os.makedirs(out_dir, exist_ok=True) | |
with tarfile.open(path, "r:gz") as tar: | |
# the filter=lambda ti: ti simply returns each TarInfo unchanged | |
tar.extractall(path=out_dir, filter=lambda ti: ti) | |
return path | |
def main(): | |
p = argparse.ArgumentParser( | |
description="Extract multiple .tgz files in parallel with a progress bar." | |
) | |
p.add_argument( | |
"pattern", | |
nargs="?", | |
default="*.tgz", | |
help="glob pattern for .tgz files (default: '*.tgz')", | |
) | |
p.add_argument( | |
"-j", | |
"--jobs", | |
type=int, | |
default=cpu_count(), | |
help="number of parallel workers (default: number of CPU cores)", | |
) | |
args = p.parse_args() | |
files = sorted(glob.glob(args.pattern)) | |
if not files: | |
print(f"No files matched pattern {args.pattern!r}") | |
return | |
with Pool(processes=args.jobs) as pool: | |
for _ in tqdm( | |
pool.imap_unordered(extract_tgz, files), | |
total=len(files), | |
desc="Extracting", | |
unit="archive", | |
): | |
pass | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment