Skip to content

Instantly share code, notes, and snippets.

@anazawa
Last active August 29, 2015 14:27
Show Gist options
  • Save anazawa/a6166996afc74ff1c34f to your computer and use it in GitHub Desktop.
Save anazawa/a6166996afc74ff1c34f to your computer and use it in GitHub Desktop.
Fetch Takaaki Yoshimoto speech scripts (http://www.1101.com/yoshimoto_voice/)
#!/usr/bin/env perl
use strict;
use warnings;
use autodie qw/rename/;
use Archive::Zip qw/AZ_OK/;
use Getopt::Long qw/GetOptions/;
use HTTP::Tiny;
use Path::Class qw/tempdir/;
use Pod::Usage qw/pod2usage/;
use Time::HiRes qw/sleep/;
my %options; GetOptions(\%options, 'dir=s');
my $command = shift || 'help';
({
help => \&help,
fetch => \&fetch,
}->{$command} || \&help)->([@ARGV], \%options);
sub help {
pod2usage(1);
}
sub fetch {
my ($argv, $options) = @_;
my @ids = @$argv ? @$argv : map {sprintf 'A%03d', $_} 1..183;
if (@ids == 1) {
my $contents = _fetch($ids[0]);
print $$contents if $contents;
return;
}
my $dir = $options->{dir} || 'yoshimoto-speech-scripts';
my $tempdir = tempdir CLEANUP => 1;
for my $id (@ids) {
my $contents = _fetch($id);
$tempdir->file("$id.txt")->spew($$contents) if $contents;
sleep 0.1;
}
rename $tempdir => $dir;
return;
}
sub _fetch {
my $id = shift || '';
warn "---> Fetching $id";
unless ($id =~ /^[AF]\d{3}$/) {
warn "Skipping $id: Not a valid speech identifier";
return;
}
my $http = HTTP::Tiny->new(
agent => 'YoshimotoSpeechScriptFetcher/0.01 '
. '(+https://gist.github.com/anazawa/a6166996afc74ff1c34f)',
);
my $res = $http->get("http://www.1101.com/yoshimoto_voice/speech/download/$id.zip");
if ($res->{status} == 404) {
warn "Skipping $id: $id was not found";
return;
}
elsif (!$res->{success}) {
die "Failed to GET $res->{url}: $res->{status} $res->{reason}";
}
warn "Extracting $id";
my $zip = Archive::Zip->new;
open my $fh, '+<', \$res->{content};
my $read_status = $zip->readFromFileHandle($fh);
die "Failed to read $id" unless $read_status == AZ_OK;
my ($txt) = $zip->membersMatching(qr/\.te?xt$/);
die "$id.zip does not contain a text file" unless $txt;
my ($contents, $contents_status) = $txt->contents;
die "Failed to uncompress $id" unless $contents_status == AZ_OK;
\$contents;
}
__END__
=head1 NAME
yoshimoto-speech-scripts -
=head1 SYNOPSIS
=head1 OPTIONS
=over 4
=item B<-help>
Print a brief help message and exits.
=back
=head1 DESCRIPTION
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment