Last active
August 29, 2015 14:08
-
-
Save anarchivist/3c6acd513f744ebbcb19 to your computer and use it in GitHub Desktop.
Krikri mapper DSL mockup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: this is all pseudocode to mock up a DSL at this point. | |
# THIS IS A VERY INCOMPLETE EXAMPLE; much work to do for mods. | |
bpl_mods_mapper = Krikri::Mapper.new | |
bpl_mods_mapper.build do | |
input_format :xml # :json, :csv, :tsv others? | |
selector type: :xpath, "//record" | |
# select an XPath relative to `selector` as the default parent for mappings | |
default_parent xpath: "metadata/mods:mods" | |
aggregation do | |
provider do | |
# Set "literals" and "identifiers" just as strings | |
prefLabel "Digital Commonwealth" | |
providedLabel prefLabel # refer to a previously set value | |
uri "http://jux/wuz" | |
end | |
# specify a particular XML element via XPath | |
dataProvider xpath: "mods:location/mods:physicalLocation" | |
# specify using XPath when attributes have specific values | |
isShownAt xpath: "mods:location/mods:url[@usage='primary' and @access='object in context']" | |
preview xpath: "mods:location/mods:url[@access='preview']" | |
aggregatedCHO do | |
collection do | |
title xpath: "dc:collection" | |
# run a function based on a parsed value | |
uri generate_opaque_uri(xpath: selector + "/header/setspec") | |
end | |
multiple_map(:contributor, :creator) do |record| | |
:creator = record.xpath("mods:name[mods:role/mods:roleTerm='creator']") | |
:contributor = record.xpath("mods:name") | |
if :creator.nil? | |
:creator = :contributor[0] | |
:contributor = :contributor[1..-1] | |
end | |
:contributor.delete(:creator) if :creator in :contributor | |
# remove <affiliation>, <displayForm>, <description>, and <role>. | |
end | |
date do | |
providedLabel xpath: "mods:originInfo/*[self::dateCreated|self::dateIssued|self::dateOther|self::copyrightDate][@encoding='w3cdtf' and @keyDate='yes']" | |
end | |
description xpath: "*[self::abstract|self::note]" | |
extent xpath: "mods:physicalDescription/mods:extent" | |
format xpath: "mods:genre" | |
identifier do |record| | |
id = record.xpath("mods:identifier") | |
id_value = id.xpath("text()") | |
id_type = id.xpath("@type").titleize | |
return "#{id_type}: #{id_value}" | |
end | |
language do | |
providedLabel xpath: "mods:language/mods:languageTerm" | |
uri xpath: "mods:language/mods:languageTerm[@valueURI]" | |
end | |
publisher xpath: "mods:originInfo/mods:place/mods:placeTerm[@type='text']|mods:originInfo/mods:publisher" | |
# relation do | |
# <mods:location><mods:physicalLocation> | |
# CONCATENATED with ". " | |
# <mods:location><mods:holdingSimple><mods:copyInformation><mods:subLocation> | |
# CONCATENATED with ". " | |
# <mods:relatedItem type="host"><mods:titleInfo><mods:title> | |
# CONCATENATED with ". " | |
# <mods:relatedItem type=""series""><mods:titleInfo><mods:title> | |
### (e.g., Boston Public Library. Leslie Jones photograph collection) | |
# end | |
rights xpath: "mods:accessCondition" | |
#spatial do | |
# <mods:subject><mods:hierarchicalGeographic> | |
# <mods:subject><mods:geographic> | |
# <mods:subject><mods:cartographics><mods:coordinates> | |
#end | |
subject do | |
providedLabel xpath: "mods:subject/*[not(self::hierarchicalGeographic|self::geographic|self::cartographics)]" | |
end | |
temporal do | |
providedLabel xpath: "mods:subject/mods:temporal" | |
end | |
# "<titleInfo> has two subelements: | |
# <title> <nonSort> AND <subTitle> | |
# (<partNumber> and <partName> are not currently supported). | |
# Supported title types are: | |
# <mods:titleInfo usage=""primary"">, | |
# <mods:titleInfo type=""alternative"">, | |
# <mods:titleInfo type=""translated"">, | |
# <mods:titleInfo type=""uniform""> | |
title xpath: "mods:titleInfo/mods:title/mods:nonSort|mods:titleInfo/mods:subTitle" | |
type xpath: "mods:typeOfResource" | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: this is all pseudocode to mock up a DSL at this point. | |
mdl_oai_qdc_mapper = Krikri::Mapper.new | |
mdl_oai_qdc_mapper.build do | |
input_format :xml # :json, :csv, :tsv others? | |
selector type: :xpath, "//record" | |
# select an XPath relative to `selector` as the default parent for mappings | |
default_parent xpath: "metadata/oai_qdc:qualifieddc" | |
aggregation do | |
provider do | |
# Set "literals" and "identifiers" just as strings | |
prefLabel "Minnesota Digital Library" | |
providedLabel prefLabel # refer to a previously set value | |
uri "http://foo/bar" | |
end | |
# specify a particular XML element via XPath | |
dataProvider xpath: "dc:publisher" | |
# select the last dc:identifier | |
isShownAt xpath: "dc:identifier[last()]" | |
# Use a function defined elsewhere (but where?) | |
object contentdm_map_object(isShownAt) | |
# Use an inline function passed as a block; "with" ensures | |
# that `preview` will only get set if `isShownAt` is not nil | |
preview with: isShownAt do | |
if isShownAt.include? "cdm/ref" | |
isShownAt.gsub("cdm/ref", "utils/getthumbnail") | |
elsif if isShownAt.include? "u?" | |
url_parts = isShownAt.split "u?" | |
base_url = url_parts[0] | |
rest = url_parts[1].split "," | |
root = rest[0] | |
pointer = rest[1] | |
"#{base_url}cgi-bin/thumbnail.exe?CISOROOT=#{root}&CISOPTR=#{pointer}" | |
else | |
nil | |
end | |
end | |
aggregatedCHO do | |
collection do | |
title xpath: "dc:collection" | |
# run a function based on a parsed value | |
uri generate_opaque_uri(xpath: selector + "/header/setspec") | |
end | |
contributor do | |
providedLabel xpath: "dc:contributor" | |
end | |
creator do | |
providedLabel xpath: "dc:creator" | |
end | |
date do | |
providedLabel xpath: "dc:created" | |
end | |
description xpath: "dc:description" | |
extent xpath: "dc:extent" | |
format xpath: "dc:medium" | |
genre do | |
providedLabel xpath: "dcterms:format" | |
end | |
identifier xpath: "dc:identifier" | |
language do | |
providedLabel xpath: "dc:language" | |
end | |
publisher xpath: "dc:source" | |
relation xpath: "dc:isPartOf" | |
#isReplacedBy | |
#replaces | |
rights xpath: "dc:rights" | |
spatial do | |
providedLabel xpath: "dcterms:spatial" | |
end | |
subject do | |
# split on semicolons? here or later? | |
providedLabel xpath: "dc:subject" | |
end | |
temporal do | |
providedLabel xpath: "dcterms:temporal" | |
end | |
title xpath: "dc:title" | |
type map_type_from_literal(xpath: "dc:type") | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: this is all pseudocode to mock up a DSL at this point. | |
uiuc_marcxml_mapper = Krikri::Mapper.new | |
uiuc_marcxml_mapper.build do | |
input_format :xml # :json, :csv, :tsv others? | |
selector type: :xpath, "//record/metadata/record" | |
# could expose convenience methods or other tools that assist with mapping | |
# e.g. marcspec: http://cklee.github.io/marc-spec/marc-spec.html | |
mapper_helper :traject | |
aggregation do | |
provider do | |
# Set "literals" and "identifiers" just as strings | |
prefLabel "University of Illinois Urbana Champaign, University Library" | |
providedLabel prefLabel # refer to a previously set value | |
uri "http://baz/quux" | |
end | |
# specify a particular XML element via XPath | |
dataProvider provider.prefLabel | |
isShownAt traject: extract_marc("856u") | |
aggregatedCHO do | |
contributor do | |
# TODO: exclude 720 when $e is "aut" or "cre" | |
providedLabel traject: "700:710:711:720" do |fld| | |
if fld.tag == "720" && (fld['e'] == "aut" || fld['e'] == "cre") | |
nil | |
else | |
extract_marc(fld) | |
end | |
end | |
end | |
creator do | |
providedLabel traject: extract("100:110:111") | |
end | |
date do | |
providedLabel traject: extract_marc("260c") | |
end | |
# all 5xx fields, except 538 | |
description traject: "5.." do |fld| # note: this is a marcspecism | |
extract_marc(fld) unless fld.tag == "538" | |
end | |
extent traject: extract_marc("300ac:340b") | |
# again, this is taking marcspec+traject syntax just as a demonstration | |
format traject: "LDR:007:337:338:340" do |fld| | |
if fld.tag[0] == "3" | |
extract(fld['a']) | |
elsif field.tag == "LDR" | |
# return LDR/6 | |
fld[6] | |
else | |
# Return 007/0 | |
fld[0] | |
end | |
end | |
#genre do | |
# providedLabel #external mapping | |
#end | |
identifier # 001; 020 [prefix ="ISBN: "]; 022$a [prefix ="ISSN: "]; 035$a; 050$a$b [prefix ="LC call number: "] | |
language do | |
providedLabel # 041$a [$2 ids source, i.e. iso689-1]; OR 008 (positions 35-37) | |
end | |
publisher traject: extract_marc("260ab") | |
relation # 760-787 | |
#isReplacedBy | |
#replaces | |
rights traject: extract_marc("506:540") | |
spatial do | |
providedLabel traject: extract_marc("650z:651a:662") | |
end | |
subject do | |
providedLabel traject: extract_marc("600:61.:650:651:653:654:655:656:657:658:69.") | |
end | |
temporal do | |
providedLabel traject: extract_marc("648") | |
end | |
title traject: extract_marc("245:242:240") # don't use 245c | |
type traject: extract_marc("337a") | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment