Skip to content

Instantly share code, notes, and snippets.

@pombredanne
Last active January 16, 2025 16:50
Show Gist options
  • Save pombredanne/379175620349e471423d5b01ae0fb56b to your computer and use it in GitHub Desktop.
Save pombredanne/379175620349e471423d5b01ae0fb56b to your computer and use it in GitHub Desktop.
Wipe out certain layer of a saved Docker image, replacing some of the layer.tar tarballs with an empty tar
c1c323164163613a794a928a61f4531429f89f88958e0e1beae56bb4a7c31611
0ae5d02809c975f02bf6c7fd0059f350edcf59345d600c553f84018f8883bc20
ddb4dde553b474b7d048c0fed4711a8ad087c39598668ef6c258797a143e2443
0a0b4a377b2e34f18f144936b4f7e2e07810454ba402979561388964f969df90
5c6a155523dc77a3f9a4f59d5139e2f53bb8de458b556d2f9c1f17ef10f546ec
f9137a022811d217b0f63e361ed194826097b0dfdd84604ffb5594e8c61c8e66
1e937d2a0ffbf2b8b761f21153adcab8bc7609bf8be41cbbd7b442426e7edc05
7fd6483e5d078e831e7194a9193205baa4a464c705cb65ed65f633a02dd83e30
0fc731e19b6385738bdc09c306570c4a27d7a7b2ef1d15e73421ffe184779af2
1f25bcf1de2b925edb8c1176b2f72cc9390fcce78606734313e256cbb3c8c3b5
68a5b1893c20e1ca5ddb1d57094a71c69f987eb1f390d6c31233d07983ae275a
62aad6f1d71cf2a3dacb7134926fd9ba535a1d85ae1ea9887032029b9fd8308b
78383d00813cea8fa081ea7ef0b35afc8ad3f3e067ca2429c5b6a8f569f47ed8
5f4aca875e4e226cf97a46c6360cd6bbfe2308bbb03464667fa60481ddb547e8
a7f1467e5045d6db8578d3fd943422d1c380627d5f65cd5b8804a773f56ebcb6
24b772a0f58196c630cc149e38135492feaa8986adc0ccdd2900745f558cfa05
69ec26a931992d960e6d48348f03e3f83357047ae6e9c99db536d4495b8e66ea
9a456faf99407524695ac0b6afdbba01df6796b8da5ea64c37754577dc43d711
f3b09b2106fb9bf56cd92cdaa8b22eb72a8663c966547b571f2a12bcc18658d5
efcf9dfc2c9281ce7175a29e3e85aa5dfa4373ed54a78ec6c4ad0ff246e10b34
583f745eb08c5eb3ffa2af517dfd94bed4036452fcaa8439e809b01dab68b133
74a21c124f278ee935beaf76367bd322f654ff1664ab6b1967b20c7ee9c4c77e
381fb0c9ae053468d6151b79ff4567db67620774cdd49dbd9e8dad5814fa0780
0d96703cb382bc3c275f2a1ac7f2978d9aa43888c81a41b2735ba47cf29340be
9ba7c4dd37ebe2782ce24de32d5b8742c2fb4d6eb480d3b05d608c93e5395448
0571e1844f5fdc2f3b3ead0e50a8b02fff3a8b9ba5a2519af5429a61d437d682
7dec2e0cc5fdfd9ee8b930867d69d464d3cd6cf54d886d7ac04d8f5a7d3da4bb
88696f1bc0eb49b94cc1b171feb970e034df93605a1fda37e6791cd698597f05
8594935dbdbeb883025a4071ef2c1928bcaa1004e04442cab5de094807df1621
be5b221f54dd42c7c447b65f8db4bec28387a4872d274dae697efee5aa0a42f4
2a64c2429916747a1d343a38d66f635df35763f3ee11d5af9eafcb39081adc59
743f624de26f6445f12d8ced13b19446495462b358fe0e64590e0f55f50899de
07c6eb22fe37cf0e359ab84cfd97d6bb98633f3065f773e659217650d8dc31bb
46541421ecb4bbe5283d66d1b2188711f8c4e9eb3794082be239ed20e47f80b7
9b67ca1715ef63f3c3ca289bf55a7906059d357d9be0aa3994aa6b9b7db783c8
b024fdd8bdda5857dd516768cec0e23cd24ea7d238793040bfdf75a6ae452cc5
dce0a52f385b53e0d83288f9b2a3516f4dfe9437e8b191f702d7a5cb5ce2fe0c
4d58cc81fdbea0d903f37e32436b29a9729fb0d5f9facdd7d1a79286fd251e4a
5c9ba5572bfa0d4cd1adee0a68359da1dd4aa451725cd83163f094ec2aaa2113
3e61df78332e0a9e825f30ae85bfdd08c57a3d00afd440b45b5d4c09c4371453
606bd52d6625e8759a5ad562a701eb8af06065cb540552f40e95aa7966c6646e
f53773405e14ed9dc921e6a7411522e3a63e7178f8c985654b0227f1e2a0c577
d156e71640f849ba11a3f78197dd85a213ac77dcc165b61b059d4e95c850605d
1ee2d815d32c8e7659e3d7714cb2c788d19fd92b803dba4e5bcfc70fc95e584b
69702adea89137f6050a38e81448bb6f798ab3e9b5acefcdf2a6d34abf81c6aa
48ff9bdcd8c7a1ebfa2e4d48bf04526a10fcf07de4c9cf2f0e8995d7426cd14b
aca51c209b2a8255849042859a3a9e86155c5b955aa8a9de233e55a4b654871d
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org for support or download.
# See https://aboutcode.org for more information about AboutCode OSS projects.
#
import argparse
import hashlib
import tarfile
import shutil
from pathlib import Path
from dataclasses import dataclass
@dataclass
class Layer:
path: Path
layer_id: str
old_sha256: str
new_sha256: str = ""
layer_number: int = 0
@classmethod
def from_tar(cls, layer_tar):
return cls(
path=layer_tar,
layer_id=layer_tar.parent.name,
old_sha256=sha2(layer_tar)
)
def empty_tarball(self):
"""Empty / Wipe clean the layer tarball"""
with tarfile.TarFile.open(self.path, "w"):
pass
self.new_sha256 = sha2(self.path)
def sha2(path):
return hashlib.sha256(path.read_bytes()).hexdigest()
def make_slim(image_tarball, layer_ids_to_empty):
"""Make an image slimmer, emptying the tarball of some layers"""
extracted = Path("extracted")
print(f"Deleting previous {extracted!r}")
shutil.rmtree(path=extracted, ignore_errors=True)
extracted.mkdir(exist_ok=True)
extracted = extracted.absolute()
print(f"Extracting image to {extracted!r}")
shutil.unpack_archive(Path(image_tarball), extract_dir=extracted)
# process layers
layers = [Layer.from_tar(layer_tar)for layer_tar in extracted.rglob("**/layer.tar")]
to_empty = [l for l in layers if l.layer_id in layer_ids_to_empty]
print(f"Emptying Layers:")
for layer in to_empty:
print(f" {layer!r}")
layer.empty_tarball()
# update config
old_config_file = [x for x in extracted.glob("*.json") if x.name != "manifest.json"][0]
old_config_sha256 = sha2(old_config_file)
old_config = old_config_file.read_text()
for layer in to_empty:
old_config = old_config.replace(f"sha256:{layer.old_sha256}", f"sha256:{layer.new_sha256}")
old_config_file.write_text(old_config)
new_config_sha256 = sha2(old_config_file)
new_config_file = extracted / f"{new_config_sha256}.json"
old_config_file.rename(new_config_file)
# update manifest
manifest_file = extracted / "manifest.json"
manifest = manifest_file.read_text()
manifest = manifest.replace(old_config_sha256, new_config_sha256)
manifest_file.write_text(manifest)
# recreate image tarball
shutil.make_archive(
base_name=f"{image_tarball.stem}-slim",
format="tar",
root_dir=extracted,
base_dir="",
)
def slimify():
description = """
Make an image slim, replacing some layer tarballs by empty tarballs. The new image will have a "-slim" name suffix.
"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
"-i",
"--image",
dest="image_tarball",
type=Path,
required=True,
metavar="FILE",
help="Path to an image tarball, exported using 'docker save'",
)
parser.add_argument(
"-l",
"--layer-ids-to-skip",
dest="layers_file",
type=Path,
required=True,
metavar="FILE",
help="Path to a file with one layer id to skip per line. "
"The layer id is the name of the directory that contains a 'layer.tar' tarball.",
)
args = parser.parse_args()
image_tarball = args.image_tarball
layers_file = args.layers_file
print(f"Slimifying {image_tarball!r} to '{image_tarball}.slim' skipping layers in {layers_file!r}")
layer_ids = layers_file.read_text().strip().split()
make_slim(image_tarball=image_tarball, layer_ids_to_empty=layer_ids)
if __name__ == "__main__":
slimify()
@pombredanne
Copy link
Author

pombredanne commented Jan 16, 2025

This script is designed for special use cases, like when trying to audit or scan very large docker images and you want to skip some layers, but still ensure that the image looks mostly OK.

You need first to docker save your image. This is designed ONLY for the docker save image format, not OCI or else.

Then you need to list the layer id you want to empty and wipe. These are the names of the parent folders that contain a layer.tar archive. List the layer ids, one per line, in a text file.

Finally run this with this command. Be mindful that a new directory named "extracted" will be created (and wiped cleaned if it exists) with the extracted content of the image:

python3 slim.py --image <path to your image.tar> --layer-ids-to-skip <path to your layer ids file>

For instance, using a large PyTorch image from nvidia (large like in 22GB):

# save a docker image
docker pull nvcr.io/nvidia/pytorch:24.01-py3
docker save  nvcr.io/nvidia/pytorch:24.01-py3 > pytorch-24.01py3.tar
# get the script and layers to skip
wget https://gist.githubusercontent.com/pombredanne/379175620349e471423d5b01ae0fb56b/raw/f7c50deb5c40fb279c8371dfb9b26bbc7a0758df/slim.py

# this list contains all the layers except the first one
wget https://gist.githubusercontent.com/pombredanne/379175620349e471423d5b01ae0fb56b/raw/491b96fe760f507219716075c65ea7472775a1f3/pytorch.layers.txt

# then run proper
python3 slim.py --image pytorch-24.01py3.tar --layer-ids-to-skip pytorch.layers.txt

The result will be a new pytorch-24.01py3-slim.tar image tarball that is just about 80MB.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment