|
#!/usr/bin/env perl |
|
|
|
use strict; |
|
use JSON::XS; |
|
use Path::Tiny; |
|
use Sereal::Encoder qw( SRL_SNAPPY SRL_ZLIB SRL_UNCOMPRESSED ); |
|
use Text::Table; |
|
|
|
die "Usage: json2sereal.pl <dir>\n\n Scans <dir> for .json files, converts to seral and compares sizes\n" unless @ARGV; |
|
|
|
my $enc_snappy = Sereal::Encoder->new({ compress => SRL_SNAPPY, dedupe_strings => 1 }); |
|
my $enc_zlib = Sereal::Encoder->new({ compress => SRL_ZLIB, dedupe_strings => 1 }); |
|
my $enc_none = Sereal::Encoder->new({ compress => SRL_UNCOMPRESSED, dedupe_strings => 1 }); |
|
my $enc_def = Sereal::Encoder->new({ compress => SRL_UNCOMPRESSED }); |
|
|
|
my %best_vpack = ( |
|
'api-docs.json' => 994160, |
|
'commits.json' => 20789, |
|
'countries.json' => 956786, |
|
'directory-tree.json' => 244716, |
|
'doubles.json' => 899982, |
|
'doubles-small.json' => 89998, |
|
'file-list.json' => 133536, |
|
'object.json' => 118630, |
|
'pass1.json' => 804, |
|
'pass2.json' => 51, |
|
'pass3.json' => 108, |
|
'random1.json' => 6836, |
|
'random2.json' => 5815, |
|
'random3.json' => 51515, |
|
'sample.json' => 153187, |
|
'small.json' => 30, |
|
); |
|
|
|
my $it = path(@ARGV)->iterator; |
|
|
|
my (@rows, %totals); |
|
while (my $f = $it->()) { |
|
my $b = $f->basename; |
|
next unless $f->is_file and $b =~ m/[.]json$/; |
|
|
|
my $c = eval { decode_json($f->slurp_raw) }; |
|
debug("Skip file '$b', could not JSON-parse it: $@"), next unless defined $c; |
|
|
|
my $v = $best_vpack{$b}; |
|
debug("Skip file '$b', no VPack comparison"), next unless $v; |
|
|
|
my $s = $f->stat->size; |
|
my ($def, $none, $snap, $zlib) = ( |
|
length($enc_def->encode($c)), length($enc_none->encode($c)), |
|
length($enc_snappy->encode($c)), length($enc_zlib->encode($c)) |
|
); |
|
|
|
$totals{json} += $s; |
|
$totals{vpack} += $v; |
|
$totals{def} += $def; |
|
$totals{none} += $none; |
|
$totals{snap} += $snap; |
|
$totals{zlib} += $zlib; |
|
|
|
push @rows, table_row($b, $s, $v, $def, $none, $snap, $zlib); |
|
} |
|
|
|
push @rows, |
|
table_row('-- Total --', $totals{json}, $totals{vpack}, $totals{def}, $totals{none}, $totals{snap}, $totals{zlib}); |
|
|
|
my $tb = Text::Table->new( |
|
'File', |
|
'JSON Size', |
|
'VPack best', |
|
'Defaults', |
|
'% JSON', |
|
'% VPack', |
|
'No Compr', |
|
'% JSON', |
|
'% VPack', |
|
'Snappy', |
|
'% JSON', |
|
'% VPack', |
|
'ZLib', |
|
'% JSON', |
|
'% VPack', |
|
); |
|
$tb->load(@rows); |
|
print $tb; |
|
|
|
|
|
sub debug { |
|
return unless $ENV{DEBUG}; |
|
print STDERR "[DEBUG] @_\n"; |
|
} |
|
|
|
sub table_row { |
|
my ($b, $s, $v, $def, $none, $snap, $zlib) = @_; |
|
|
|
return [ |
|
$b, |
|
$s, |
|
$v, |
|
$def, |
|
sprintf('%.2f%%', $def / $s * 100), |
|
sprintf('%.2f%%', $def / $v * 100), |
|
$none, |
|
sprintf('%.2f%%', $none / $s * 100), |
|
sprintf('%.2f%%', $none / $v * 100), |
|
$snap, |
|
sprintf('%.2f%%', $snap / $s * 100), |
|
sprintf('%.2f%%', $snap / $v * 100), |
|
$zlib, |
|
sprintf('%.2f%%', $zlib / $s * 100), |
|
sprintf('%.2f%%', $zlib / $v * 100) |
|
]; |
|
} |