Created
August 30, 2013 08:34
-
-
Save zakame/6387625 to your computer and use it in GitHub Desktop.
Benchmarking a few ways of splitting some lines and inserting into a hash
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use warnings; | |
use strict; | |
use Benchmark qw(:all); | |
use Text::CSV_XS; | |
# file with 1_000_000 entries of 'test|N,xxxN|noob' where N = number | |
my $file = '/tmp/test.txt'; | |
my $readable_split = sub { | |
my %h; | |
open my $fh, '<', $file | |
or die "Can't open $file: $!"; | |
while ( my $line = <$fh> ) { | |
my @fields = split /\|/ => $line; | |
# get the first item, ignore the rest (note parens in lvalue) | |
my ($key) = split /\,/ => $fields[1]; | |
$h{$key} = $line; | |
} | |
}; | |
my $cascaded_split = sub { | |
my %h; | |
open my $fh, '<', $file | |
or die "Can't open $file: $!"; | |
while ( my $line = <$fh> ) { | |
my $key = ( split /,/ => ( split /\|/ => $line )[1] )[0]; | |
$h{$key} = $line; | |
} | |
}; | |
my $csv_xs_split = sub { | |
my %h; | |
my $csv = Text::CSV_XS->new( { sep_char => '|' } ) | |
or die "Cannot use CSV: ", Text::CSV_XS->error_diag; | |
open my $fh, '<', $file | |
or die "Can't open $file: $!"; | |
while ( my $line = $csv->getline($fh) ) { | |
my ($key) = split /,/ => $line->[1]; | |
$h{$key} = join '|' => @$line; | |
} | |
}; | |
# run these splits on that file several times | |
my $results = timethese( | |
10, | |
{ readable_split => $readable_split, | |
cascaded_split => $cascaded_split, | |
csv_xs_split => $csv_xs_split, | |
} | |
); | |
cmpthese($results); | |
__END__ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
zakame@yorozuya:/tmp% perl bench_hash.pl | |
Benchmark: timing 10 iterations of cascaded_split, csv_xs_split, readable_split... | |
cascaded_split: 21 wallclock secs (21.01 usr + 0.13 sys = 21.14 CPU) @ 0.47/s (n=10) | |
csv_xs_split: 49 wallclock secs (48.62 usr + 0.12 sys = 48.74 CPU) @ 0.21/s (n=10) | |
readable_split: 27 wallclock secs (26.90 usr + 0.10 sys = 27.00 CPU) @ 0.37/s (n=10) | |
s/iter csv_xs_split readable_split cascaded_split | |
csv_xs_split 4.87 -- -45% -57% | |
readable_split 2.70 81% -- -22% | |
cascaded_split 2.11 131% 28% -- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment