Last active
January 8, 2023 16:43
-
-
Save pR0Ps/1b368e7de466d709af008225c60604dc to your computer and use it in GitHub Desktop.
Show binary diff of files while handling inserted/removed data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Shows a binary diff of files. Handles cases where data was inserted/removed | |
instead of just modified in place to avoid showing that the rest of a file | |
after a modification was changed. | |
WARNING: The algorithm used to generate the diff is quadratic in the expected | |
case and cubic in the worst case. Do not run this on large files unless you | |
want to wait for a *very* long time. Additionally, because it was only meant to | |
analyze small files, all data from the provided files will be read completely | |
into memory for analysis. | |
""" | |
import difflib | |
import itertools | |
import sys | |
WIDTH = 16 | |
PADDING_ASCII = "-" | |
PADDING_BYTES = "--" | |
BEFORE = "\033[31m" # red | |
AFTER = "\033[32m" # green | |
RESET = "\033[0m" # reset | |
assert WIDTH > 0 | |
assert len(PADDING_BYTES) == 2 | |
assert len(PADDING_ASCII) == 1 | |
class Char: | |
def __init__(self, char=None): | |
self.char = char | |
self.color_set = None | |
self.color_unset = None | |
def get_ascii(self): | |
return "".join( | |
( | |
self.color_set or "", | |
PADDING_ASCII | |
if self.char is None | |
else chr(self.char) | |
if 32 <= self.char <= 126 | |
else ".", | |
self.color_unset or "", | |
) | |
) | |
def get_bytes(self): | |
return "".join( | |
( | |
self.color_set or "", | |
PADDING_BYTES if self.char is None else "{:02X}".format(self.char), | |
self.color_unset or "", | |
) | |
) | |
__repr__ = get_bytes | |
def print_output(line_num, data, start=None, end=None): | |
for x in range(2): | |
d = data[x][start:end] | |
print( | |
"{line:06X}|{data_bytes}{padding}|{data_ascii}".format( | |
line=line_num * WIDTH, | |
data_bytes=" ".join(c.get_bytes() for c in d), | |
padding=" " * (WIDTH - len(d)), | |
data_ascii="".join(c.get_ascii() for c in d), | |
) | |
) | |
def do_diff(data1, data2, show_colors=True): | |
# headers | |
print( | |
"offset|{headers}|ASCII".format( | |
headers=" ".join(Char(x).get_bytes() for x in range(WIDTH)) | |
) | |
) | |
print("------+{}+{}".format("-" * (WIDTH * 3 - 1), "-" * WIDTH)) | |
s = difflib.SequenceMatcher(None, data1, data2, autojunk=False) | |
buff = [[], []] | |
line_num = 0 | |
for tag, i1, i2, j1, j2 in s.get_opcodes(): | |
tmp = [ | |
[Char(x) for x in data1[i1:i2]], | |
[Char(x) for x in data2[j1:j2]], | |
] | |
# Pad to same len | |
pad = len(tmp[1]) - len(tmp[0]) | |
if pad > 0: | |
tmp[0].extend([Char() for _ in range(pad)]) | |
elif pad < 0: | |
tmp[1].extend([Char() for _ in range(pad * -1)]) | |
# colorize hex output | |
b_len = len(buff[0]) | |
t_len = len(tmp[0]) | |
# inset color markers at start/end of ranges | |
# then at points where the output will wrap onto future lines | |
# takes into account the length of the buffer to prepend | |
if show_colors: | |
color = [BEFORE, AFTER] if tag != "equal" else [RESET, RESET] | |
for x in itertools.chain([0], range(WIDTH - b_len, t_len, WIDTH)): | |
for y in range(2): | |
tmp[y][x].color_set = color[y] | |
tmp[y][x - 1].color_unset = RESET | |
# Prepend previous leftover buffer to output | |
for x in range(2): | |
tmp[x] = buff[x] + tmp[x] | |
# Print all complete lines | |
out_len = len(tmp[0]) | |
for x in range(out_len // WIDTH): | |
print_output(line_num, tmp, x * WIDTH, x * WIDTH + WIDTH) | |
line_num += 1 | |
# Add leftover data | |
missed = out_len % WIDTH | |
for x in range(2): | |
buff[x] = tmp[x][-missed:] if missed else [] | |
# print any remaining buffer | |
if len(buff[0]): | |
print_output(line_num, buff) | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser( | |
description="Show a colored binary diff of 2 files" | |
) | |
parser.add_argument("file1") | |
parser.add_argument("file2") | |
parser.add_argument("--color", action=argparse.BooleanOptionalAction) | |
args = parser.parse_args() | |
if args.color is None: | |
args.color = sys.stdout.isatty() | |
with open(args.file1, "rb") as f1: | |
with open(args.file2, "rb") as f2: | |
do_diff(f1.read(), f2.read(), show_colors=args.color) | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output example