Skip to content

Instantly share code, notes, and snippets.

@PIPIPIG233666
Last active June 17, 2025 18:20
Show Gist options
  • Save PIPIPIG233666/e260dce0d1505c784f411585e515762d to your computer and use it in GitHub Desktop.
Save PIPIPIG233666/e260dce0d1505c784f411585e515762d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import glob
import os
import tarfile
from multiprocessing import cpu_count, Pool
from tqdm import tqdm
def extract_tgz(path):
"""
Extract a single .tgz archive into a folder named after the archive
(without its .tgz extension), using an explicit filter to avoid
the Python-3.14 deprecation warning.
"""
out_dir = os.path.splitext(path)[0]
os.makedirs(out_dir, exist_ok=True)
with tarfile.open(path, "r:gz") as tar:
# the filter=lambda ti: ti simply returns each TarInfo unchanged
tar.extractall(path=out_dir, filter=lambda ti: ti)
return path
def main():
p = argparse.ArgumentParser(
description="Extract multiple .tgz files in parallel with a progress bar."
)
p.add_argument(
"pattern",
nargs="?",
default="*.tgz",
help="glob pattern for .tgz files (default: '*.tgz')",
)
p.add_argument(
"-j",
"--jobs",
type=int,
default=cpu_count(),
help="number of parallel workers (default: number of CPU cores)",
)
args = p.parse_args()
files = sorted(glob.glob(args.pattern))
if not files:
print(f"No files matched pattern {args.pattern!r}")
return
with Pool(processes=args.jobs) as pool:
for _ in tqdm(
pool.imap_unordered(extract_tgz, files),
total=len(files),
desc="Extracting",
unit="archive",
):
pass
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment