Last active
January 16, 2025 16:50
-
-
Save pombredanne/379175620349e471423d5b01ae0fb56b to your computer and use it in GitHub Desktop.
Wipe out certain layer of a saved Docker image, replacing some of the layer.tar tarballs with an empty tar
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
c1c323164163613a794a928a61f4531429f89f88958e0e1beae56bb4a7c31611 | |
0ae5d02809c975f02bf6c7fd0059f350edcf59345d600c553f84018f8883bc20 | |
ddb4dde553b474b7d048c0fed4711a8ad087c39598668ef6c258797a143e2443 | |
0a0b4a377b2e34f18f144936b4f7e2e07810454ba402979561388964f969df90 | |
5c6a155523dc77a3f9a4f59d5139e2f53bb8de458b556d2f9c1f17ef10f546ec | |
f9137a022811d217b0f63e361ed194826097b0dfdd84604ffb5594e8c61c8e66 | |
1e937d2a0ffbf2b8b761f21153adcab8bc7609bf8be41cbbd7b442426e7edc05 | |
7fd6483e5d078e831e7194a9193205baa4a464c705cb65ed65f633a02dd83e30 | |
0fc731e19b6385738bdc09c306570c4a27d7a7b2ef1d15e73421ffe184779af2 | |
1f25bcf1de2b925edb8c1176b2f72cc9390fcce78606734313e256cbb3c8c3b5 | |
68a5b1893c20e1ca5ddb1d57094a71c69f987eb1f390d6c31233d07983ae275a | |
62aad6f1d71cf2a3dacb7134926fd9ba535a1d85ae1ea9887032029b9fd8308b | |
78383d00813cea8fa081ea7ef0b35afc8ad3f3e067ca2429c5b6a8f569f47ed8 | |
5f4aca875e4e226cf97a46c6360cd6bbfe2308bbb03464667fa60481ddb547e8 | |
a7f1467e5045d6db8578d3fd943422d1c380627d5f65cd5b8804a773f56ebcb6 | |
24b772a0f58196c630cc149e38135492feaa8986adc0ccdd2900745f558cfa05 | |
69ec26a931992d960e6d48348f03e3f83357047ae6e9c99db536d4495b8e66ea | |
9a456faf99407524695ac0b6afdbba01df6796b8da5ea64c37754577dc43d711 | |
f3b09b2106fb9bf56cd92cdaa8b22eb72a8663c966547b571f2a12bcc18658d5 | |
efcf9dfc2c9281ce7175a29e3e85aa5dfa4373ed54a78ec6c4ad0ff246e10b34 | |
583f745eb08c5eb3ffa2af517dfd94bed4036452fcaa8439e809b01dab68b133 | |
74a21c124f278ee935beaf76367bd322f654ff1664ab6b1967b20c7ee9c4c77e | |
381fb0c9ae053468d6151b79ff4567db67620774cdd49dbd9e8dad5814fa0780 | |
0d96703cb382bc3c275f2a1ac7f2978d9aa43888c81a41b2735ba47cf29340be | |
9ba7c4dd37ebe2782ce24de32d5b8742c2fb4d6eb480d3b05d608c93e5395448 | |
0571e1844f5fdc2f3b3ead0e50a8b02fff3a8b9ba5a2519af5429a61d437d682 | |
7dec2e0cc5fdfd9ee8b930867d69d464d3cd6cf54d886d7ac04d8f5a7d3da4bb | |
88696f1bc0eb49b94cc1b171feb970e034df93605a1fda37e6791cd698597f05 | |
8594935dbdbeb883025a4071ef2c1928bcaa1004e04442cab5de094807df1621 | |
be5b221f54dd42c7c447b65f8db4bec28387a4872d274dae697efee5aa0a42f4 | |
2a64c2429916747a1d343a38d66f635df35763f3ee11d5af9eafcb39081adc59 | |
743f624de26f6445f12d8ced13b19446495462b358fe0e64590e0f55f50899de | |
07c6eb22fe37cf0e359ab84cfd97d6bb98633f3065f773e659217650d8dc31bb | |
46541421ecb4bbe5283d66d1b2188711f8c4e9eb3794082be239ed20e47f80b7 | |
9b67ca1715ef63f3c3ca289bf55a7906059d357d9be0aa3994aa6b9b7db783c8 | |
b024fdd8bdda5857dd516768cec0e23cd24ea7d238793040bfdf75a6ae452cc5 | |
dce0a52f385b53e0d83288f9b2a3516f4dfe9437e8b191f702d7a5cb5ce2fe0c | |
4d58cc81fdbea0d903f37e32436b29a9729fb0d5f9facdd7d1a79286fd251e4a | |
5c9ba5572bfa0d4cd1adee0a68359da1dd4aa451725cd83163f094ec2aaa2113 | |
3e61df78332e0a9e825f30ae85bfdd08c57a3d00afd440b45b5d4c09c4371453 | |
606bd52d6625e8759a5ad562a701eb8af06065cb540552f40e95aa7966c6646e | |
f53773405e14ed9dc921e6a7411522e3a63e7178f8c985654b0227f1e2a0c577 | |
d156e71640f849ba11a3f78197dd85a213ac77dcc165b61b059d4e95c850605d | |
1ee2d815d32c8e7659e3d7714cb2c788d19fd92b803dba4e5bcfc70fc95e584b | |
69702adea89137f6050a38e81448bb6f798ab3e9b5acefcdf2a6d34abf81c6aa | |
48ff9bdcd8c7a1ebfa2e4d48bf04526a10fcf07de4c9cf2f0e8995d7426cd14b | |
aca51c209b2a8255849042859a3a9e86155c5b955aa8a9de233e55a4b654871d |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (c) nexB Inc. and others. All rights reserved. | |
# SPDX-License-Identifier: Apache-2.0 | |
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | |
# See https://github.com/aboutcode-org for support or download. | |
# See https://aboutcode.org for more information about AboutCode OSS projects. | |
# | |
import argparse | |
import hashlib | |
import tarfile | |
import shutil | |
from pathlib import Path | |
from dataclasses import dataclass | |
@dataclass | |
class Layer: | |
path: Path | |
layer_id: str | |
old_sha256: str | |
new_sha256: str = "" | |
layer_number: int = 0 | |
@classmethod | |
def from_tar(cls, layer_tar): | |
return cls( | |
path=layer_tar, | |
layer_id=layer_tar.parent.name, | |
old_sha256=sha2(layer_tar) | |
) | |
def empty_tarball(self): | |
"""Empty / Wipe clean the layer tarball""" | |
with tarfile.TarFile.open(self.path, "w"): | |
pass | |
self.new_sha256 = sha2(self.path) | |
def sha2(path): | |
return hashlib.sha256(path.read_bytes()).hexdigest() | |
def make_slim(image_tarball, layer_ids_to_empty): | |
"""Make an image slimmer, emptying the tarball of some layers""" | |
extracted = Path("extracted") | |
print(f"Deleting previous {extracted!r}") | |
shutil.rmtree(path=extracted, ignore_errors=True) | |
extracted.mkdir(exist_ok=True) | |
extracted = extracted.absolute() | |
print(f"Extracting image to {extracted!r}") | |
shutil.unpack_archive(Path(image_tarball), extract_dir=extracted) | |
# process layers | |
layers = [Layer.from_tar(layer_tar)for layer_tar in extracted.rglob("**/layer.tar")] | |
to_empty = [l for l in layers if l.layer_id in layer_ids_to_empty] | |
print(f"Emptying Layers:") | |
for layer in to_empty: | |
print(f" {layer!r}") | |
layer.empty_tarball() | |
# update config | |
old_config_file = [x for x in extracted.glob("*.json") if x.name != "manifest.json"][0] | |
old_config_sha256 = sha2(old_config_file) | |
old_config = old_config_file.read_text() | |
for layer in to_empty: | |
old_config = old_config.replace(f"sha256:{layer.old_sha256}", f"sha256:{layer.new_sha256}") | |
old_config_file.write_text(old_config) | |
new_config_sha256 = sha2(old_config_file) | |
new_config_file = extracted / f"{new_config_sha256}.json" | |
old_config_file.rename(new_config_file) | |
# update manifest | |
manifest_file = extracted / "manifest.json" | |
manifest = manifest_file.read_text() | |
manifest = manifest.replace(old_config_sha256, new_config_sha256) | |
manifest_file.write_text(manifest) | |
# recreate image tarball | |
shutil.make_archive( | |
base_name=f"{image_tarball.stem}-slim", | |
format="tar", | |
root_dir=extracted, | |
base_dir="", | |
) | |
def slimify(): | |
description = """ | |
Make an image slim, replacing some layer tarballs by empty tarballs. The new image will have a "-slim" name suffix. | |
""" | |
parser = argparse.ArgumentParser(description=description) | |
parser.add_argument( | |
"-i", | |
"--image", | |
dest="image_tarball", | |
type=Path, | |
required=True, | |
metavar="FILE", | |
help="Path to an image tarball, exported using 'docker save'", | |
) | |
parser.add_argument( | |
"-l", | |
"--layer-ids-to-skip", | |
dest="layers_file", | |
type=Path, | |
required=True, | |
metavar="FILE", | |
help="Path to a file with one layer id to skip per line. " | |
"The layer id is the name of the directory that contains a 'layer.tar' tarball.", | |
) | |
args = parser.parse_args() | |
image_tarball = args.image_tarball | |
layers_file = args.layers_file | |
print(f"Slimifying {image_tarball!r} to '{image_tarball}.slim' skipping layers in {layers_file!r}") | |
layer_ids = layers_file.read_text().strip().split() | |
make_slim(image_tarball=image_tarball, layer_ids_to_empty=layer_ids) | |
if __name__ == "__main__": | |
slimify() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script is designed for special use cases, like when trying to audit or scan very large docker images and you want to skip some layers, but still ensure that the image looks mostly OK.
You need first to
docker save
your image. This is designed ONLY for the docker save image format, not OCI or else.Then you need to list the layer id you want to empty and wipe. These are the names of the parent folders that contain a layer.tar archive. List the layer ids, one per line, in a text file.
Finally run this with this command. Be mindful that a new directory named "extracted" will be created (and wiped cleaned if it exists) with the extracted content of the image:
For instance, using a large PyTorch image from nvidia (large like in 22GB):
The result will be a new
pytorch-24.01py3-slim.tar
image tarball that is just about 80MB.