Created
June 22, 2021 10:58
-
-
Save Vovan-VE/18f8cab84f699bc70bb40bd1b1b51898 to your computer and use it in GitHub Desktop.
Merge series of single row INSERT from `mysqldump` into series of multi-INSERT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use 5.010; | |
use Getopt::Long; | |
sub usage(;$) { | |
my ($exit) = @_; | |
say 'Usage'; | |
say " $0 [--max-length=<length>]"; | |
say ''; | |
say 'Description'; | |
say ''; | |
say ' Reads input and writes to output.'; | |
exit($exit) if defined $exit; | |
} | |
sub match($$) { | |
my ($str, $re) = @_; | |
return unless $str =~ /($re)/; | |
return { | |
str => $&, | |
len => length $&, | |
from => $-[0], | |
to => $+[0], | |
}; | |
} | |
my %opt = ( | |
'max-length' => 32768, | |
); | |
Getopt::Long::Configure("bundling"); | |
GetOptions(\%opt, | |
"help|h|?", | |
"max-length=i", | |
) | |
or usage(2); | |
usage(0) if defined $opt{help}; | |
my $re_name = qr/`[^`]++`|"[^"]++"/; | |
my $re_start = qr/ | |
^ INSERT \h+ INTO \h+ $re_name \h* | |
(?: | |
\( | |
$re_name \h* | |
(?: | |
, \h* | |
$re_name \h* | |
)* | |
\) \h* | |
)? | |
VALUES \h* | |
/x; | |
my $re_end = qr/;$/; | |
use constant { | |
REPLACE_START => ',', | |
REPLACE_END => '', | |
}; | |
my $in_first; | |
my $in_second; | |
my $out_first; | |
my $out_second; | |
my $first_begins; | |
my $second_begins; | |
my $first_ends; | |
my $second_ends; | |
my $joined_length; | |
binmode STDIN, ":bytes"; | |
binmode STDOUT, ":bytes"; | |
while (<STDIN>) { | |
chomp; | |
print $out_first if defined $out_first; | |
my $line = $_; | |
($in_first, $in_second) = ($in_second, $line); | |
($out_first, $out_second) = ($out_second, $line); | |
# skip step on first input line where no prev line is | |
next unless defined $in_first; | |
($first_begins, $second_begins) = ($second_begins, match($in_second, $re_start)); | |
($first_ends, $second_ends) = ($second_ends, match($in_second, $re_end)); | |
if ($first_begins && $first_ends && $second_begins) { | |
if ($first_begins->{str} eq $second_begins->{str}) { | |
my $new_join_length = $joined_length // length $out_first; | |
$new_join_length += length(REPLACE_END) - length($first_ends->{str}); | |
$new_join_length += length($out_second) - length($second_begins->{str}); | |
$new_join_length += length REPLACE_START; | |
if ($new_join_length < $opt{'max-length'}) { | |
substr($out_first, -$first_ends->{len}) = REPLACE_END; | |
substr($out_second, $second_begins->{from}, $second_begins->{len}) = REPLACE_START; | |
$joined_length = $new_join_length; | |
next; | |
} | |
} | |
} | |
undef $joined_length; | |
$out_first .= "\n"; | |
} | |
print $out_first if defined $out_first; | |
print $out_second if defined $out_second; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment