Created
April 3, 2013 09:31
-
-
Save eiro/5299751 to your computer and use it in GitHub Desktop.
ASN to MIR and ISO2709
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/perl | |
use Modern::Perl; | |
use YAML; | |
use Perlude; | |
use MARC::MIR; | |
sub tokenize { | |
my @t; | |
push @t,{%+} while m{ | |
\G\s* | |
(?: (?<DATA> [-A-Za-z0-9_]+) | |
| " (?<DATA> (?:\\.|[^"])+ ) " | |
| (?<COMMA> \, ) | |
| (?<START> \{ ) | |
| (?<END> \} ) | |
| (?<UNKNOWN> .+ ) ) | |
}gx; | |
@t | |
} | |
sub is_data { exists ((shift)->{DATA}) } | |
sub commit_pair { | |
my $data = shift; | |
push @$data, [splice $data,-2,2]; | |
} | |
sub get_struct; | |
sub get_struct { | |
my ( $tokens ) = shift; | |
my $data = []; | |
while ( my ($t) = $tokens->() ) { | |
if ( is_data $t ) { push @$data, $$t{DATA} } | |
elsif ( $$t{COMMA} ) { commit_pair $data } | |
elsif ( $$t{START} ) { push @$data, get_struct $tokens } | |
elsif ( $$t{END} ) { | |
commit_pair $data; | |
return $data; | |
} | |
else { die YAML::Dump $t } | |
} | |
} | |
sub get_record (_) { | |
my ( $tokens ) = shift; | |
my ( $ktoken, $value ) = fold take 2, $tokens; | |
return unless $ktoken; # end of stream | |
my $key = $$ktoken{DATA} or die; | |
if ( my $value = $$value{DATA} ) {return [ $key, $value] } | |
$$value{START} and return [ $key, get_struct $tokens ]; | |
die YAML::Dump { "not a pair " => [ $ktoken, $value ] }; | |
} | |
sub asn_records_of (_) { | |
my $tokens = shift; | |
sub { get_record $tokens // () } | |
} | |
sub asn_tokens_of { concatM {unfold tokenize} lines shift } | |
sub subfields_and_indicators (_) { | |
my $indicators = [' ',' ']; | |
my $subfields = | |
[ map { | |
my ( $k, $v ) = @$_; | |
if ( $k ~~ /ind(\d)$/ ) { | |
$$indicators[$1] = $v; | |
() | |
} else { | |
$k = substr( $k, 3); | |
# DORIS KEY ... LOL ALGO | |
# TODO: grow up, dude! | |
ref $v and $v = $$v[0][1][0][1]; | |
[ $k, $v ]; | |
} | |
} @{(shift)} ]; | |
$subfields, $indicators; | |
} | |
sub mir_fields (_) { | |
map { | |
if ( ref $$_[1] ) { | |
$_ = | |
[ $$_[0] | |
, subfields_and_indicators $$_[1] ] | |
} | |
$_ | |
} @{(shift)} | |
} | |
sub to_mir (_) { | |
my ( $label, $ast ) = @{(shift)}; | |
[ $$label[1] | |
, [mir_fields $$ast[1]] ] | |
} | |
sub asn_hashified (_) { | |
my $asn = {}; | |
my ( $key, $ast ) = @{(shift)}; | |
for ( @$ast ) { | |
my ( $k, $v ) = @$_; | |
die "duplicated $k" if exists $$asn{$k}; | |
$$asn{$k} = $v; | |
} | |
$asn; | |
} | |
now { | |
for ( $$_{T_DORIS} ) { | |
# say YAML::Dump $_ = to_mir; | |
say to_iso2709 to_mir; | |
} | |
} | |
apply {asn_hashified} | |
asn_records_of asn_tokens_of shift; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment