Last active
July 31, 2018 03:41
-
-
Save audreyt/4642830 to your computer and use it in GitHub Desktop.
<教育部重編國語辭典修訂本>單字下載腳本雛形
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use utf8; | |
use Encode; | |
binmode STDOUT, ':utf8'; | |
for my $x (0xA4 .. 0xF9) { | |
for my $y (0x40 .. 0x7E, 0xA1 .. 0xFE) { | |
my $big5 = sprintf('%%%02X%%%02X', $x, $y); | |
my $char = Encode::decode(big5 => chr($x) . chr($y)); | |
print qq[curl -m 10 --retry 10 --retry-delay 10 'http://dict.revised.moe.edu.tw/cgi-bin/newDict/dict.sh?idx=dict.idx&cond=%5E$big5%24&pieceLen=100&fld=1&cat=&imgFont=1' | piconv -f big5 -t utf8 | perl -ne 'next unless m!^<table width="90%"! .. m!^</table!; s!<span class="key">(.+?)</span>!\$1!g; print' > "$char.html"\n]; | |
print "sleep 2\n"; | |
} | |
} |
Author
audreyt
commented
Jan 26, 2013
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment