-
-
Save no-reply/349fced9c644949e32ee to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: this is all pseudocode to mock up a DSL at this point. | |
# THIS IS A VERY INCOMPLETE EXAMPLE; much work to do for mods. | |
bpl_mods_mapper = Krikri::Mapper.new | |
bpl_mods_mapper.build do | |
input_format :xml # :json, :csv, :tsv others? | |
selector type: :xpath, "//record" | |
# select an XPath relative to `selector` as the default parent for mappings | |
default_parent xpath: "metadata/mods:mods" | |
aggregation do | |
provider do | |
# Set "literals" and "identifiers" just as strings | |
prefLabel "Digital Commonwealth" | |
providedLabel prefLabel # refer to a previously set value | |
uri "http://jux/wuz" | |
end | |
# specify a particular XML element via XPath | |
dataProvider xpath: "mods:location/mods:physicalLocation" | |
# specify using XPath when attributes have specific values | |
isShownAt xpath: "mods:location/mods:url[@usage='primary' and @access='object in context']" | |
preview xpath: "mods:location/mods:url[@access='preview']" | |
aggregatedCHO do | |
collection do | |
title xpath: "dc:collection" | |
# run a function based on a parsed value | |
uri generate_opaque_uri(xpath: selector + "/header/setspec") | |
end | |
multiple_map(:contributor, :creator) do |record| | |
:creator = record.xpath("mods:name[mods:role/mods:roleTerm='creator']") | |
:contributor = record.xpath("mods:name") | |
if :creator.nil? | |
:creator = :contributor[0] | |
:contributor = :contributor[1..-1] | |
end | |
:contributor.delete(:creator) if :creator in :contributor | |
# remove <affiliation>, <displayForm>, <description>, and <role>. | |
end | |
date do | |
providedLabel xpath: "mods:originInfo/*[self::dateCreated|self::dateIssued|self::dateOther|self::copyrightDate][@encoding='w3cdtf' and @keyDate='yes']" | |
end | |
description xpath: "*[self::abstract|self::note]" | |
extent xpath: "mods:physicalDescription/mods:extent" | |
format xpath: "mods:genre" | |
identifier do |record| | |
id = record.xpath("mods:identifier") | |
id_value = id.xpath("text()") | |
id_type = id.xpath("@type").titleize | |
return "#{id_type}: #{id_value}" | |
end | |
language do | |
providedLabel xpath: "mods:language/mods:languageTerm" | |
uri xpath: "mods:language/mods:languageTerm[@valueURI]" | |
end | |
publisher xpath: "mods:originInfo/mods:place/mods:placeTerm[@type='text']|mods:originInfo/mods:publisher" | |
# relation do | |
# <mods:location><mods:physicalLocation> | |
# CONCATENATED with ". " | |
# <mods:location><mods:holdingSimple><mods:copyInformation><mods:subLocation> | |
# CONCATENATED with ". " | |
# <mods:relatedItem type="host"><mods:titleInfo><mods:title> | |
# CONCATENATED with ". " | |
# <mods:relatedItem type=""series""><mods:titleInfo><mods:title> | |
### (e.g., Boston Public Library. Leslie Jones photograph collection) | |
# end | |
rights xpath: "mods:accessCondition" | |
#spatial do | |
# <mods:subject><mods:hierarchicalGeographic> | |
# <mods:subject><mods:geographic> | |
# <mods:subject><mods:cartographics><mods:coordinates> | |
#end | |
subject do | |
providedLabel xpath: "mods:subject/*[not(self::hierarchicalGeographic|self::geographic|self::cartographics)]" | |
end | |
temporal do | |
providedLabel xpath: "mods:subject/mods:temporal" | |
end | |
# "<titleInfo> has two subelements: | |
# <title> <nonSort> AND <subTitle> | |
# (<partNumber> and <partName> are not currently supported). | |
# Supported title types are: | |
# <mods:titleInfo usage=""primary"">, | |
# <mods:titleInfo type=""alternative"">, | |
# <mods:titleInfo type=""translated"">, | |
# <mods:titleInfo type=""uniform""> | |
title xpath: "mods:titleInfo/mods:title/mods:nonSort|mods:titleInfo/mods:subTitle" | |
type xpath: "mods:typeOfResource" | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: this is all pseudocode to mock up a DSL at this point. | |
# This presents an interface slightly different from the other files, | |
# focused on the external interactions from Harvester -> OriginalRecord -> | |
# DPLA::MAP::Aggregation | |
harvester = Krikri::Harvesters::OAIHarvester.new(:endpoint => endpoint_url) | |
Krikri::Mapper.define :uiuc_marcxml do | |
input_format :xml | |
selector type: :xpath, "//record/metadata/record" | |
# ... | |
# the rest of the internals here; extrapolate from other examples | |
# ... | |
end | |
original_rec = harvester.get_records.first | |
# map a single record | |
agg = Krikri::Mapper.map(:uiuc_marcxml, original_rec) | |
# => #<DPLA::MAP::Aggregation:0x3f93c7bf4efc(default)> | |
agg.persisted? # => true | |
# allow #map to run on #each if passed an enumerator | |
# ? this is maybe a long process if called on a typical harvester | |
Krikri::Mapper.map(:uiuc_marcxml, harvester.get_records) | |
# => [#<DPLA::MAP::Aggregation:0x3f93c7bf4efc(default), #<DPLA::MAP::Aggregation:0x3f93c7be595c(default)>, ...] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: this is all pseudocode to mock up a DSL at this point. | |
mdl_oai_qdc_mapper = Krikri::Mapper.new | |
mdl_oai_qdc_mapper.build do | |
input_format :xml # :json, :csv, :tsv others? | |
selector type: :xpath, "//record" | |
# select an XPath relative to `selector` as the default parent for mappings | |
default_parent xpath: "metadata/oai_qdc:qualifieddc" | |
aggregation do | |
provider do | |
# Set "literals" and "identifiers" just as strings | |
prefLabel "Minnesota Digital Library" | |
providedLabel prefLabel # refer to a previously set value | |
uri "http://foo/bar" | |
end | |
# specify a particular XML element via XPath | |
dataProvider xpath: "dc:publisher" | |
# select the last dc:identifier | |
isShownAt xpath: "dc:identifier[last()]" | |
# Use a function defined elsewhere (but where?) | |
object contentdm_map_object(isShownAt) | |
# Use an inline function passed as a block; "with" ensures | |
# that `preview` will only get set if `isShownAt` is not nil | |
preview with: isShownAt do | |
if isShownAt.include? "cdm/ref" | |
isShownAt.gsub("cdm/ref", "utils/getthumbnail") | |
elsif if isShownAt.include? "u?" | |
url_parts = isShownAt.split "u?" | |
base_url = url_parts[0] | |
rest = url_parts[1].split "," | |
root = rest[0] | |
pointer = rest[1] | |
"#{base_url}cgi-bin/thumbnail.exe?CISOROOT=#{root}&CISOPTR=#{pointer}" | |
else | |
nil | |
end | |
end | |
aggregatedCHO do | |
collection do | |
title xpath: "dc:collection" | |
# run a function based on a parsed value | |
uri generate_opaque_uri(xpath: selector + "/header/setspec") | |
end | |
contributor do | |
providedLabel xpath: "dc:contributor" | |
end | |
creator do | |
providedLabel xpath: "dc:creator" | |
end | |
date do | |
providedLabel xpath: "dc:created" | |
end | |
description xpath: "dc:description" | |
extent xpath: "dc:extent" | |
format xpath: "dc:medium" | |
genre do | |
providedLabel xpath: "dcterms:format" | |
end | |
identifier xpath: "dc:identifier" | |
language do | |
providedLabel xpath: "dc:language" | |
end | |
publisher xpath: "dc:source" | |
relation xpath: "dc:isPartOf" | |
#isReplacedBy | |
#replaces | |
rights xpath: "dc:rights" | |
spatial do | |
providedLabel xpath: "dcterms:spatial" | |
end | |
subject do | |
# split on semicolons? here or later? | |
providedLabel xpath: "dc:subject" | |
end | |
temporal do | |
providedLabel xpath: "dcterms:temporal" | |
end | |
title xpath: "dc:title" | |
type map_type_from_literal(xpath: "dc:type") | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uiuc_marcxml_mapper.build do | |
# this specifies the parser | |
# `selector`, `mapper_helper`, and other possible parser configurations would be | |
# encapsulated at that level. | |
input_format :marc # :xml, :json, :csv, :tsv, :modsxml, :rdf, others? | |
aggregation do | |
provider do | |
# Set "literals" and "identifiers" just as strings | |
prefLabel "University of Illinois Urbana Champaign, University Library" | |
providedLabel prefLabel # refer to a previously set value | |
# #method_missing doing a very different thing in this | |
# context. Here be dragons? | |
uri "http://baz/quux" | |
end | |
dataProvider provider.prefLabel # #method_missing magic | |
isShownAt record['856u'] | |
aggregatedCHO do | |
contributor do | |
# TODO: exclude 720 when $e is "aut" or "cre" | |
# tom: figure this out! | |
providedLabel traject: "700:710:711:720" do |fld| | |
if fld.tag == "720" && (fld['e'] == "aut" || fld['e'] == "cre") | |
nil | |
else | |
extract_marc(fld) | |
end | |
end | |
end | |
creator do | |
providedLabel record['100'], record['110'], record['111'] | |
end | |
date do | |
providedLabel record['260']['c'] | |
end | |
# all 5xx fields, except 538 | |
# tom: figure this out! | |
description traject: "5.." do |fld| # note: this is a marcspecism | |
extract_marc(fld) unless fld.tag == "538" | |
end | |
extent record['300']['ac'], record['340']['b'] | |
# again, this is taking marcspec+traject syntax just as a demonstration | |
format traject: "LDR:007:337:338:340" do |fld| | |
if fld.tag[0] == "3" | |
extract(fld['a']) | |
elsif field.tag == "LDR" | |
# return LDR/6 | |
fld[6] | |
else | |
# Return 007/0 | |
fld[0] | |
end | |
end | |
#genre do | |
# providedLabel #external mapping | |
#end | |
identifier # 001; 020 [prefix ="ISBN: "]; 022$a [prefix ="ISSN: "]; 035$a; 050$a$b [prefix ="LC call number: "] | |
language do | |
providedLabel # 041$a [$2 ids source, i.e. iso689-1]; OR 008 (positions 35-37) | |
end | |
publisher traject: extract_marc("260ab") | |
relation # 760-787 | |
#isReplacedBy | |
#replaces | |
rights traject: extract_marc("506:540") | |
spatial do | |
providedLabel traject: extract_marc("650z:651a:662") | |
end | |
subject do | |
providedLabel traject: extract_marc("600:61.:650:651:653:654:655:656:657:658:69.") | |
end | |
temporal do | |
providedLabel traject: extract_marc("648") | |
end | |
title traject: extract_marc("245:242:240") # don't use 245c | |
type traject: extract_marc("337a") | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: this is all pseudocode to mock up a DSL at this point. | |
uiuc_marcxml_mapper = Krikri::Mapper.new | |
uiuc_marcxml_mapper.build do | |
input_format :xml # :json, :csv, :tsv others? | |
selector type: :xpath, "//record/metadata/record" | |
# could expose convenience methods or other tools that assist with mapping | |
# e.g. marcspec: http://cklee.github.io/marc-spec/marc-spec.html | |
mapper_helper :traject | |
aggregation do | |
provider do | |
# Set "literals" and "identifiers" just as strings | |
prefLabel "University of Illinois Urbana Champaign, University Library" | |
providedLabel prefLabel # refer to a previously set value | |
uri "http://baz/quux" | |
end | |
# specify a particular XML element via XPath | |
dataProvider provider.prefLabel | |
isShownAt traject: extract_marc("856u") | |
aggregatedCHO do | |
contributor do | |
# TODO: exclude 720 when $e is "aut" or "cre" | |
providedLabel traject: "700:710:711:720" do |fld| | |
if fld.tag == "720" && (fld['e'] == "aut" || fld['e'] == "cre") | |
nil | |
else | |
extract_marc(fld) | |
end | |
end | |
end | |
creator do | |
providedLabel traject: extract("100:110:111") | |
end | |
date do | |
providedLabel traject: extract_marc("260c") | |
end | |
# all 5xx fields, except 538 | |
description traject: "5.." do |fld| # note: this is a marcspecism | |
extract_marc(fld) unless fld.tag == "538" | |
end | |
extent traject: extract_marc("300ac:340b") | |
# again, this is taking marcspec+traject syntax just as a demonstration | |
format traject: "LDR:007:337:338:340" do |fld| | |
if fld.tag[0] == "3" | |
extract(fld['a']) | |
elsif field.tag == "LDR" | |
# return LDR/6 | |
fld[6] | |
else | |
# Return 007/0 | |
fld[0] | |
end | |
end | |
#genre do | |
# providedLabel #external mapping | |
#end | |
identifier # 001; 020 [prefix ="ISBN: "]; 022$a [prefix ="ISSN: "]; 035$a; 050$a$b [prefix ="LC call number: "] | |
language do | |
providedLabel # 041$a [$2 ids source, i.e. iso689-1]; OR 008 (positions 35-37) | |
end | |
publisher traject: extract_marc("260ab") | |
relation # 760-787 | |
#isReplacedBy | |
#replaces | |
rights traject: extract_marc("506:540") | |
spatial do | |
providedLabel traject: extract_marc("650z:651a:662") | |
end | |
subject do | |
providedLabel traject: extract_marc("600:61.:650:651:653:654:655:656:657:658:69.") | |
end | |
temporal do | |
providedLabel traject: extract_marc("648") | |
end | |
title traject: extract_marc("245:242:240") # don't use 245c | |
type traject: extract_marc("337a") | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment