Created
February 4, 2021 06:11
-
-
Save BradKnowles/ffb92e44ef8d8d0cfb071bdc639a2620 to your computer and use it in GitHub Desktop.
XSLT for Gutenburg RDF to JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<xsl:stylesheet version="3.0" xml:base="http://www.gutenberg.org/" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:cc="http://web.resource.org/cc/" | |
xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" | |
xmlns:dcterms="http://purl.org/dc/terms/" | |
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" | |
xmlns:dcam="http://purl.org/dc/dcam/" | |
xmlns:marcrel="http://id.loc.gov/vocabulary/relators/"> | |
<xsl:output omit-xml-declaration="yes" method="text" /> | |
<xsl:variable name='crlf' select="'
'" /> | |
<xsl:template match="/"> | |
<!-- Beginining of JSON --> | |
<xsl:text>{</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:call-template name="number_property"> | |
<xsl:with-param name="name" select="'bookId'" /> | |
<xsl:with-param name="path" select="tokenize(/rdf:RDF/pgterms:ebook/@rdf:about,'/')[last()]" /> | |
</xsl:call-template> | |
<xsl:apply-templates /> | |
<xsl:text> "contributors" : [</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:for-each-group select="//pgterms:agent" group-by="."> | |
<xsl:text> {</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "contributorId" : </xsl:text> | |
<xsl:value-of select="tokenize(@rdf:about,'/')[last()]" /> | |
<xsl:text>,</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "contributorType" : "</xsl:text> | |
<xsl:value-of select="./parent::*/local-name()" /> | |
<xsl:text>",</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "name" : "</xsl:text> | |
<xsl:value-of select="pgterms:name" /> | |
<xsl:text>",</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "webpage" : "</xsl:text> | |
<xsl:value-of select="pgterms:webpage/@rdf:resource" /> | |
<xsl:text>",</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:call-template name="number_property"> | |
<xsl:with-param name="space" select="' '" /> | |
<xsl:with-param name="name" select="'birthYear'" /> | |
<xsl:with-param name="path" select="pgterms:birthdate" /> | |
</xsl:call-template> | |
<xsl:call-template name="number_property"> | |
<xsl:with-param name="space" select="' '" /> | |
<xsl:with-param name="name" select="'deathYear'" /> | |
<xsl:with-param name="path" select="pgterms:deathdate" /> | |
</xsl:call-template> | |
<xsl:text> "aliases" : [</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<!-- aliases begin --> | |
<xsl:for-each select="pgterms:alias"> | |
<xsl:text> "</xsl:text> | |
<xsl:value-of select="." /> | |
<xsl:text>"</xsl:text> | |
<xsl:if test="not(position() = last())"> | |
<xsl:text>,</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
</xsl:if> | |
</xsl:for-each> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> ]</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<!-- aliases end --> | |
<xsl:text> }</xsl:text> | |
<xsl:if test="not(position() = last())"> | |
<xsl:text>,</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
</xsl:if> | |
</xsl:for-each-group> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> ],</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "subjects" : [</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:for-each-group select="//dcterms:subject" group-by="rdf:Description/dcam:memberOf/@rdf:resource"> | |
<xsl:for-each select="current-group()"> | |
<!-- Exclude the Library of Congress Classification (LCC) code. It will be extracted separately --> | |
<xsl:if test="not(current-grouping-key() = 'http://purl.org/dc/terms/LCC')"> | |
<xsl:text> "</xsl:text> | |
<xsl:value-of select="rdf:Description/rdf:value" /> | |
<xsl:text>"</xsl:text> | |
<xsl:if test="not(position() = last())"> | |
<xsl:text>,</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
</xsl:if> | |
</xsl:if> | |
</xsl:for-each> | |
</xsl:for-each-group> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> ],</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "bookshelves" : [</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:for-each-group select="//pgterms:bookshelf" group-by="rdf:Description"> | |
<xsl:text> "</xsl:text> | |
<xsl:value-of select="rdf:Description/rdf:value" /> | |
<xsl:text>"</xsl:text> | |
<xsl:if test="not(position() = last())"> | |
<xsl:text>,</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
</xsl:if> | |
</xsl:for-each-group> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> ],</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "formats" : [</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:for-each-group select="//dcterms:hasFormat" group-by="pgterms:file"> | |
<xsl:text> {</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "URL" : "</xsl:text> | |
<xsl:value-of select="pgterms:file/@rdf:about" /> | |
<xsl:text>",</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "mimeType" : "</xsl:text> | |
<xsl:value-of select="pgterms:file/dcterms:format/rdf:Description/rdf:value" /> | |
<xsl:text>",</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> "lastModified" : "</xsl:text> | |
<xsl:value-of select="pgterms:file/dcterms:modified" /> | |
<xsl:text>"</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> }</xsl:text> | |
<xsl:if test="not(position() = last())"> | |
<xsl:text>,</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
</xsl:if> | |
</xsl:for-each-group> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text> ]</xsl:text> | |
<!-- End of JSON --> | |
<xsl:value-of select="$crlf" /> | |
<xsl:text>}</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
</xsl:template> | |
<xsl:template match="pgterms:ebook/dcterms:description"> | |
<xsl:call-template name="string_property"> | |
<xsl:with-param name="name" select="'notes'" /> | |
</xsl:call-template> | |
</xsl:template> | |
<xsl:template match="dcterms:title"> | |
<xsl:call-template name="string_property"> | |
<xsl:with-param name="name" select="'title'" /> | |
</xsl:call-template> | |
</xsl:template> | |
<xsl:template match="dcterms:issued"> | |
<xsl:call-template name="string_property"> | |
<xsl:with-param name="name" select="'releaseDate'" /> | |
</xsl:call-template> | |
</xsl:template> | |
<xsl:template match="dcterms:type"> | |
<xsl:call-template name="string_property"> | |
<xsl:with-param name="name" select="'category'" /> | |
<xsl:with-param name="path" select="rdf:Description/rdf:value" /> | |
</xsl:call-template> | |
</xsl:template> | |
<xsl:template match="dcterms:language"> | |
<xsl:call-template name="string_property"> | |
<xsl:with-param name="name" select="'language'" /> | |
<xsl:with-param name="path" select="rdf:Description/rdf:value" /> | |
</xsl:call-template> | |
</xsl:template> | |
<xsl:template match="dcam:memberOf[@rdf:resource='http://purl.org/dc/terms/LCC']"> | |
<xsl:call-template name="string_property"> | |
<xsl:with-param name="name" select="'lccClass'" /> | |
<xsl:with-param name="path" select=".//preceding-sibling::rdf:value" /> | |
</xsl:call-template> | |
</xsl:template> | |
<xsl:template name="string_property"> | |
<xsl:param name = "name" /> | |
<xsl:param name = "path" select="." /> | |
<xsl:text> "</xsl:text> | |
<xsl:value-of select="$name" /> | |
<xsl:text>" : "</xsl:text> | |
<xsl:value-of select="normalize-space(replace($path, ' ', ' :'))" /> | |
<xsl:text>",</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
</xsl:template> | |
<xsl:template name="number_property"> | |
<xsl:param name = "space" select="' '" /> | |
<xsl:param name = "name" /> | |
<xsl:param name = "path" select="." /> | |
<xsl:value-of select="$space" /> | |
<xsl:text>"</xsl:text> | |
<xsl:value-of select="$name" /> | |
<xsl:text>" : </xsl:text> | |
<xsl:choose> | |
<xsl:when test="$path"> | |
<xsl:value-of select="$path" /> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:text>null</xsl:text> | |
</xsl:otherwise> | |
</xsl:choose> | |
<xsl:text>,</xsl:text> | |
<xsl:value-of select="$crlf" /> | |
</xsl:template> | |
<xsl:template match="text()" /> | |
<!-- <xsl:template name="ErrorOnUnmatched" match="*"> | |
<xsl:if test="name() != 'rdf:RDF'"> | |
<xsl:message terminate="no">Unexpected element: <xsl:value-of select="name()" /></xsl:message> | |
</xsl:if> | |
</xsl:template> --> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment