Last active
August 25, 2024 21:36
-
-
Save scottchiefbaker/0747e6b6a6c66abe0d27e972d3ddac8e to your computer and use it in GitHub Desktop.
FFMPEG reencode helper script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Data::Dump::Color; | |
use File::Basename; | |
use Getopt::Long; | |
use JSON::PP; | |
use Cwd 3.75 qw(abs_path); | |
use Time::HiRes qw(time); | |
use Encode; | |
binmode(STDOUT, ":utf8"); | |
############################################################################### | |
############################################################################### | |
my $crf = undef; | |
my $outdir = ""; | |
my $preset = ""; | |
my $height = 0; | |
my $hevc = 1; | |
my $h264 = 0; | |
my $sample = 0; | |
my $quiet = 0; | |
my $debug = 0; | |
my $keep = 0; | |
my $keep_surround = 0; | |
my $info = 0; | |
my $verbose = 0; | |
my $eight_bit = 0; | |
my $dry = 0; | |
my $yify = 0; | |
my $sanitize = 0; | |
my $aac = 0; | |
my ($res_480, $res_720, $res_1080); | |
my @params; | |
my $script_start = time(); | |
my $ok = GetOptions( | |
"outdir=s" => \$outdir, | |
"crf=i" => \$crf, | |
"height|h=i" => \$height, | |
"hevc" => \$hevc, | |
"h264" => \$h264, | |
"sample" => \$sample, | |
"preset=s" => \$preset, | |
"quiet" => \$quiet, | |
"debug" => \$debug, | |
"keep" => \$keep, | |
"surround" => \$keep_surround, | |
"info" => \$info, | |
"verbose" => \$verbose, | |
"480p" => \$res_480, | |
"720p" => \$res_720, | |
"1080p" => \$res_1080, | |
"8bit" => \$eight_bit, | |
"dry" => \$dry, | |
"yify" => \$yify, | |
"sanitize" => \$sanitize, | |
"aac" => \$aac, | |
); | |
if (!$ok) { | |
die(usage()); | |
} | |
if ($h264) { | |
$hevc = 0; | |
} | |
if ($res_480) { | |
$height = 480; | |
} elsif ($res_720) { | |
$height = 720; | |
} elsif ($res_1080) { | |
$height = 1080; | |
} | |
# If we're outputting to another directory, keep is implied | |
if ($outdir) { | |
$keep = 1; | |
} | |
if ($yify) { | |
$keep = 1; | |
$height = 720; | |
$hevc = 1; | |
$keep_surround = 0; # Convert to stereo | |
$crf = 26; | |
} | |
# Build an array of readable files to encode | |
my @files; | |
foreach my $file (@ARGV) { | |
$file = Encode::decode('utf8', $file); | |
if (-r $file && -f $file) { | |
push(@files, $file); | |
} elsif (!-f $file) { | |
printf("%sWarning:%s Skipping '%s' as it's not a file\n", color("yellow"), color("reset"), $file); | |
} else { | |
printf("%sWarning:%s Skipping '%s' as it's not readable\n", color("yellow"), color("reset"), $file); | |
} | |
} | |
if (!@files) { | |
print "Usage: reencode [file1.mp4] [file2.mkv] ...\n"; | |
# If there are files then show the info for all of them | |
@files = sort(glob("./*.mp4"), glob("./*.mkv")); | |
foreach my $x (@files) { | |
$x = Encode::decode('utf8', $x); | |
} | |
if (!@ARGV && @files) { | |
print "\n"; | |
$info = 1; | |
} else { | |
exit(9); | |
} | |
} | |
my $vid_codec = "h264"; | |
if ($hevc) { | |
push(@params, "-c:v"); | |
push(@params, "hevc"); | |
$vid_codec = "hevc"; | |
} | |
if ($crf) { | |
push(@params, "-crf"); | |
push(@params, "$crf"); | |
} | |
if ($preset) { | |
push(@params, "-preset"); | |
push(@params, $preset); | |
} | |
# Sample only does the first X seconds | |
if ($sample) { | |
push(@params, "-t"); | |
push(@params, "90"); | |
# We definitely keep the original file if we're only making a sample | |
$keep = 1; | |
} | |
if (!$debug) { | |
push(@params, "-loglevel"); | |
push(@params, "quiet"); | |
} | |
# Don't output the ffmpeg encoding status (it's really verbose) | |
if (!$quiet) { | |
push(@params, "-stats"); | |
} | |
if ($eight_bit) { | |
push(@params, "-pix_fmt"); | |
push(@params, "yuv420p"); | |
} | |
my $total_size = 0; | |
my $total_compressed = 0; | |
my $tmp_outdir = ''; | |
foreach my $infile (@files) { | |
my $base = basename($infile); | |
my $sum = show_video_summary($infile); | |
my $orig_height = get_video_height($infile); | |
if ($orig_height < $height) { | |
my $orange = color('orange'); | |
my $reset = color(); | |
print $orange . "Warning:$reset disabling upscaling ($orig_height < $height)\n"; | |
$height = $orig_height; | |
} | |
# Scale the video if applicable | |
if ($height > 100) { | |
push(@params, "-vf"); | |
push(@params, "scale=-2:$height"); | |
} | |
my $aud_channels = $sum->{audio}->[0]->{channels} || 0; | |
my $is_surround = ($aud_channels > 2); | |
my @aud_params; | |
my $aud_codec; | |
if ($aac || ($is_surround && !$keep_surround)) { | |
# Default to aac @ 128Kb/s | |
push(@aud_params, "-c:a"); | |
push(@aud_params, "aac"); | |
push(@aud_params, "-b:a"); | |
push(@aud_params, "128k"); | |
# Two audio channels | |
push(@aud_params, "-ac"); | |
push(@aud_params, "2"); | |
$aud_codec = "2.0 [aac @ 128Kb/s]"; | |
} else { | |
push(@aud_params, "-c:a"); | |
push(@aud_params, "copy"); | |
$aud_codec = "*COPY*"; | |
} | |
# If we're doing info, don't process anything else | |
if ($info) { | |
next; | |
} | |
$base =~ s/\.(mp4|avi|mkv)//ig; | |
# Remove some common strings from the file name | |
$base =~ s/(1080p|\d\d\dp|HEVC|x265|x264|h.264|-TBS|-strife|-MeGusta|-mSD|-NhaNc3)//gi; | |
my $orig = $base; | |
if ($sanitize) { | |
$base = sanitize($base,'.'); | |
} else { | |
# Remove and double/triple "." from the filename | |
$base =~ s/\.+/./ig; | |
# Remove ".-" | |
$base =~ s/\.-/./ig; | |
# Remove "()" | |
$base =~ s/\(\)//ig; | |
} | |
my @prefix; | |
push(@prefix, "ffmpeg"); | |
push(@prefix, "-y"); | |
push(@prefix, "-i"); | |
push(@prefix, $infile); | |
# If we don't have a specified outdir, we use the same dir the input file is in | |
if (!$outdir || $tmp_outdir) { | |
$tmp_outdir = dirname($infile) . "/"; | |
$outdir = $tmp_outdir; | |
} | |
# If we have an output directory append a / for safety | |
if ($outdir) { | |
$outdir .= "/"; | |
# Make sure there is only one trailing / | |
$outdir =~ s|\/+$|\/|g; | |
} | |
# If there is an output dir make sure we can write to it | |
if ($outdir && !(-w -d $outdir)) { | |
my $str = sprintf("\n%sError:%s $outdir is not writable\n", color('red'), color('reset')); | |
die($str); | |
} | |
# If there is no output dir assume ./ | |
if (!$outdir && (!-w -d "./")) { | |
my $path = abs_path("./"); | |
my $str = sprintf("\n%sError:%s $path is not writable\n", color('red'), color('reset')); | |
die($str); | |
} | |
$base = trim($base); | |
my $outfile = $outdir . $base; | |
if ($outfile !~ /\.mkv/) { | |
$outfile .= ".mkv"; | |
} | |
if ($hevc) { | |
$outfile =~ s/\.mkv/.hevc.mkv/g; | |
} | |
if ($yify) { | |
$outfile =~ s/\.mkv/.tablet.mkv/g; | |
} | |
my @cmd_full = (@prefix, @aud_params, @params, $outfile); | |
# Are the input/output filenames/paths the same | |
my $same = abs_path($infile) eq abs_path($outfile); | |
if ($same) { | |
printf("%sWarning:%s Skipping '%s' because the input/output file names are the same\n", color("yellow"), color("reset"), $base); | |
next; | |
} | |
my @escaped = @cmd_full; | |
# Put quotes around anything with a *space* in it | |
foreach my $i (@escaped) { | |
if ($i =~ / /) { | |
$i = "'$i'"; | |
} | |
} | |
if ($dry) { | |
my $cmd = join(" ", @escaped); | |
print "CMD: $cmd\n\n"; | |
exit(7); | |
} | |
# Print out the full command params, and then run it | |
my $start = time(); | |
print "\n"; | |
if ($verbose) { | |
dd(\@cmd_full); | |
} | |
printf("== OUTPUT ==\n"); | |
printf(" File : '%s'\n", $outfile); | |
printf(" Video : [%s]\n", $vid_codec); | |
printf(" Audio : %s\n", $aud_codec); | |
if ($crf) { | |
printf(" CRF : %s\n", $crf); | |
} | |
printf(" Subtitle : %s\n", "*COPY*"); | |
print "\n"; | |
system(@cmd_full); | |
my $exit = $? >> 8; | |
print "\n"; | |
if ($exit != 0) { | |
dd(\@cmd_full); | |
} | |
# See if the original/encoded file length is the same | |
# This is to check if the encode was completed successfully | |
my $orig_length = get_video_length($infile) // 330; | |
my $encoded_length = get_video_length($outfile) // 0; | |
# A little difference is OK. We say within 5 seconds is OK | |
my $diff = abs($orig_length - $encoded_length); | |
if ($diff < 5) { | |
my $diff_size = int((filesize($infile) - filesize($outfile)) / 1024 / 1024); | |
$total_size += filesize($infile); | |
$total_compressed += filesize($outfile); | |
if (!$keep) { | |
unlink($infile); | |
#rename($infile, "$infile.orig"); | |
} | |
my $final_size = (filesize($outfile) / 1024 / 1024); | |
printf("%sReencode successful:%s %s, %0.1fMB (saved %s%0.1fMB%s)\n", color("green"), color("reset"), human_time(time() - $start), $final_size, color(43), $diff_size, color('reset')); | |
} elsif ($sample) { | |
printf("%sInfo:%s Sample file generated in %s\n", color("green"), color("reset"), human_time($diff)); | |
} else { | |
printf("%sError:%s Something went wrong with the encode. Output length difference: %s\n", color("red"), color("reset"), human_time($diff)); | |
} | |
} | |
my $count = scalar(@files); | |
if ($count > 1) { | |
my $total_time = human_time(time() - $script_start); | |
$total_size = color(43, human_size($total_size)); | |
$total_compressed = color(229, human_size($total_compressed)); | |
if (!$info) { | |
print "\n"; | |
print "Compressed $count files in $total_time. $total_size compressed down to $total_compressed\n"; | |
} | |
} | |
############################################################################### | |
############################################################################### | |
sub filesize { | |
my $file = shift(); | |
my $ret = -s $file; | |
return $ret; | |
} | |
sub argv { | |
my $ret = {}; | |
for (my $i = 0; $i < scalar(@ARGV); $i++) { | |
# If the item starts with "-" it's a key | |
if ((my ($key) = $ARGV[$i] =~ /^--?([a-zA-Z_]\w*)/) && ($ARGV[$i] !~ /^-\w\w/)) { | |
# If the next item does not start with "--" it's the value for this item | |
if (defined($ARGV[$i + 1]) && ($ARGV[$i + 1] !~ /^--?\D/)) { | |
$ret->{$key} = $ARGV[$i + 1]; | |
# Bareword like --verbose with no options | |
} else { | |
$ret->{$key}++; | |
} | |
} | |
} | |
# We're looking for a certain item | |
if ($_[0]) { return $ret->{$_[0]}; } | |
return $ret; | |
} | |
sub trim { | |
if (wantarray) { | |
my @ret; | |
foreach (@_) { | |
push(@ret,scalar(trim($_))); | |
} | |
return @ret; | |
} else { | |
my $s = shift(); | |
if (length($s) == 0) { return ""; } | |
$s =~ s/^\s*//; | |
$s =~ s/\s*$//; | |
return $s; | |
} | |
} | |
# Debug print variable using either Data::Dump::Color (preferred) or Data::Dumper | |
# Creates methods k() and kd() to print, and print & die respectively | |
BEGIN { | |
if (eval { require Data::Dump::Color }) { | |
*k = sub { Data::Dump::Color::dd(@_) }; | |
} else { | |
require Data::Dumper; | |
*k = sub { print Data::Dumper::Dumper(\@_) }; | |
} | |
sub kd { | |
k(@_); | |
printf("Died at %2\$s line #%3\$s\n",caller()); | |
exit(15); | |
} | |
} | |
# String format: '115', '165_bold', '10_on_140', 'reset', 'on_173', 'red', 'white_on_blue' | |
sub color { | |
my ($str, $txt) = @_; | |
# If we're NOT connected to a an interactive terminal don't do color | |
#if (-t STDOUT == 0) { return $txt // ''; } | |
# No string sent in, so we just reset | |
if (!length($str) || $str eq 'reset') { return "\e[0m"; } | |
# Some predefined colors | |
my %color_map = qw(red 160 blue 27 green 34 yellow 226 orange 214 purple 93 white 15 black 0); | |
$str =~ s|([A-Za-z]+)|$color_map{$1} // $1|eg; | |
# Get foreground/background and any commands | |
my ($fc,$cmd) = $str =~ /^(\d{1,3})?_?(\w+)?$/g; | |
my ($bc) = $str =~ /on_(\d{1,3})$/g; | |
# Some predefined commands | |
my %cmd_map = qw(bold 1 italic 3 underline 4 blink 5 inverse 7); | |
my $cmd_num = $cmd_map{$cmd // 0}; | |
my $ret = ''; | |
if ($cmd_num) { $ret .= "\e[${cmd_num}m"; } | |
if (defined($fc)) { $ret .= "\e[38;5;${fc}m"; } | |
if (defined($bc)) { $ret .= "\e[48;5;${bc}m"; } | |
if ($txt) { $ret .= $txt . "\e[0m"; } | |
return $ret; | |
} | |
sub get_video_length { | |
my $file = shift(); | |
my $x = get_video_info($file); | |
my $ret = $x->{format}->{duration}; | |
return $ret; | |
} | |
sub get_video_height { | |
my $file = shift(); | |
my $x = get_video_info($file); | |
my @vid = find_stream($x, 'video'); | |
my $height = $vid[0]->{height}; | |
return $height; | |
} | |
sub get_video_info { | |
my $file = shift(); | |
my $cmd = "ffprobe -v quiet -print_format json -show_format -show_streams \"$file\""; | |
my $out = `$cmd`; | |
my $x = decode_json($out); | |
return $x; | |
} | |
sub show_video_summary { | |
my $file = shift(); | |
my $x = get_video_info($file); | |
my $duration = human_time($x->{format}->{duration}); | |
my @vid = find_stream($x, 'video'); | |
my @aud = find_stream($x, 'audio'); | |
my @sub = find_stream($x, 'subtitle'); | |
if (!@vid) { | |
print color("red", "WARNING:"); | |
my $file_str = color('yellow', $file); | |
print " Skipping $file_str because it's not video\n"; | |
return {}; | |
} | |
my $ret; | |
$ret->{video} = \@vid; | |
$ret->{audio} = \@aud; | |
$ret->{subtitle} = \@sub; | |
my $filesize = filesize($file) || 0; | |
my $bitrate = $x->{format}->{bit_rate} || 0; | |
my $br_str = color('yellow', human_size($bitrate) . "b/s"); | |
print color('orange', "== INPUT ==") . "\n"; | |
printf(" File : '%s'\n", color('white', basename($file))); | |
printf(" Size : %s (%s)\n", human_size($filesize), $br_str); | |
printf(" Length : %s\n", $duration); | |
foreach my $x (@vid) { | |
my $pix_format = $x->{pix_fmt} // ""; | |
my $bit_rate = $x->{bit_rate} // 1024; | |
$bit_rate /= 1000; | |
my $bit_str = ""; | |
if (is_10bit($pix_format)) { | |
$bit_str = "10bit"; | |
} else { | |
$bit_str = "8bit"; | |
} | |
$bit_str = color('87', $bit_str); | |
my $res_str = get_resolution_string($x->{width}, $x->{height}); | |
my $br_str = color('yellow', $bit_rate . "Kb/s"); | |
if ($bit_rate > 2) { | |
printf(" Video : %s @ %s [%s] (%s)\n", $res_str, $br_str, color('green', $x->{codec_name}), $bit_str); | |
} else { | |
printf(" Video : %s [%s] (%s)\n", $res_str, color('green', $x->{codec_name}), $bit_str); | |
} | |
} | |
foreach my $x (@aud) { | |
my $aud_chan = "??"; | |
my $bit_rate = $x->{bit_rate} // 0; | |
$bit_rate /= 1000; | |
if ($x->{channels} == 2) { | |
$aud_chan = "2.0"; | |
} elsif($x->{channels} == 6) { | |
$aud_chan = "5.1"; | |
} elsif ($x->{channels} == 1) { | |
$aud_chan = "Mono"; | |
} | |
#my $codec = $x->{profile} // $x->{codec_name}; | |
my $codec = $x->{codec_name}; | |
my $br_str = color('yellow', $bit_rate . "Kb/s"); | |
if ($bit_rate > 2) { | |
printf(" Audio : %s @ %s [%s]\n", $aud_chan, $br_str, color('green', $codec)); | |
} else { | |
printf(" Audio : %s [%s]\n", $aud_chan, color('green', $codec)); | |
} | |
} | |
foreach my $x (@sub) { | |
my $lang = $x->{tags}->{title} || ""; | |
if ($lang) { | |
printf(" Subtitle : [%s] (%s)\n", $x->{codec_name}, $lang); | |
} else { | |
printf(" Subtitle : [%s]\n", $x->{codec_name}); | |
} | |
} | |
if (!@sub) { | |
#printf(" Subtitle : *NONE*\n"); | |
printf(" Subtitle : " . color("white", "NONE") . "\n"); | |
} | |
return $ret; | |
} | |
sub find_stream { | |
my $x = shift(); | |
my $type = shift(); | |
my @ret = (); | |
foreach my $y (@{$x->{streams}}) { | |
my $found_type = $y->{codec_type}; | |
if ($type eq $found_type) { | |
push(@ret, $y); | |
} | |
} | |
return @ret; | |
} | |
sub human_time { | |
my $secs = shift() // 0; | |
my $ret = ""; | |
if (int($secs / 31536000) > 0) { $ret .= int($secs / 31536000) . " years "; } | |
if (int(($secs % 31536000) / 2628000) > 0) { $ret .= int(($secs % 31536000) / 2628000) . " months "; } | |
if (int(($secs % 2628000) / 86400) > 0) { $ret .= int(($secs % 2628000) / 86400) . " days "; } | |
if (int(($secs % 86400) / 3600) > 0) { $ret .= int(($secs % 86400) / 3600) . " hours "; } | |
if (int(($secs % 3600) / 60) > 0) { $ret .= int(($secs % 3600) / 60) . " minutes "; } | |
if (int($secs % 60) > 0) { $ret .= int($secs % 60) . " seconds "; } | |
$ret =~ s/\s+$//; | |
return $ret; | |
} | |
sub human_size { | |
my $size = shift(); | |
my $ret = 0; | |
if ($size > 1024**4) { | |
$ret = sprintf("%.1fT",$size / 1024**4); | |
} elsif ($size > 1024**3) { | |
$ret = sprintf("%.1fG",$size / 1024**3); | |
} elsif ($size > 1024**2) { | |
$ret = sprintf("%.1fM",$size / 1024**2); | |
} elsif ($size > 1024) { | |
$ret = sprintf("%.1fK",$size / 1024); | |
} elsif ($size > 0) { | |
$ret = sprintf("%dB",$size); | |
} | |
#k("HS: $size/$ret"); | |
return $ret; | |
} | |
sub is_10bit { | |
my $pix_format = shift(); | |
if ($pix_format eq "yuv420p10le") { | |
return 1; | |
} else { | |
return 0; | |
} | |
} | |
sub get_resolution_string { | |
my ($width, $height) = @_; | |
my $ret = ""; | |
if ($width == 1920 && ($height == 1080 || $height == 800)) { | |
$ret = "1080p"; | |
} elsif ($width == 1280 && $height == 720) { | |
$ret = "720p"; | |
} elsif ($width == 720 && $height == 480) { | |
$ret = "480p"; | |
} else { | |
$ret = $width . "x" . $height; | |
} | |
#k($width, $height, $ret); | |
return $ret; | |
} | |
sub usage { | |
return "$0 filename.mp4 [--keep] [--720p] [--sample]"; | |
} | |
sub sanitize { | |
my $str = shift(); | |
my $sep = shift() // "_"; | |
if (!defined($str)) { | |
return undef; | |
} | |
# Convert multiple non-word sequences to the separator | |
$str =~ s/[\W_]+/$sep/g; | |
# The separator is a literal character so we quotemeta it | |
$sep = quotemeta($sep); | |
# Remove any separators at the beginning and end | |
$str =~ s/\A$sep+//; | |
$str =~ s/$sep+\z//; | |
return $str; | |
} | |
# vim: filetype=perl tabstop=4 shiftwidth=4 autoindent softtabstop=4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment