Last active
July 16, 2024 23:31
-
-
Save ChocopieKewpie/693565b7a0773c3ce25004d7257a1d7e to your computer and use it in GitHub Desktop.
ISO19139 to Dublin Core (click)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Sep 6 15:28:29 2023 | |
@author: ArdoJ | |
""" | |
import click | |
from lxml import etree | |
@click.command() | |
@click.argument("f_input", required=True, type=click.Path(), nargs=1) | |
@click.argument("f_output", required=True, type=click.Path(), nargs=1) | |
def dcmi_19115(f_input, f_output): | |
file=(str(f_input)) | |
fileout=(str(f_output)) | |
# Load the ISO 19139 XML document | |
iso19139_xml = etree.parse(file) | |
namespaces = { | |
'gmd': 'http://www.isotc211.org/2005/gmd', | |
'gco': 'http://www.isotc211.org/2005/gco', | |
'gml': 'http://www.opengis.net/gml/3.2', | |
} | |
#DCMI Mapping from ISO19139 | |
title = iso19139_xml.xpath('//gmd:title/gco:CharacterString/text()', namespaces=namespaces) | |
################################Section outlines the CREATOR##################################### | |
creator = {} | |
creator_individual_name = iso19139_xml.xpath( | |
'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:individualName/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
creator_organization_name = iso19139_xml.xpath( | |
'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:organisationName/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
creator_email = iso19139_xml.xpath( | |
'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
# Populate the 'creator' dictionary | |
if creator_individual_name: | |
creator['individualName'] = creator_individual_name[0] if creator_individual_name else None | |
if creator_organization_name: | |
creator['organizationName'] = creator_organization_name[0] if creator_organization_name else None | |
if creator_email: | |
creator['email'] = creator_email[0] if creator_email else None | |
creator = [ | |
"Name="+ creator['individualName'] if creator else None, | |
"Organization="+ creator['organizationName'] if creator else None, | |
"email="+ creator['email'] if creator else None, | |
] | |
################################################################################################### | |
####################################Section outlines the Publisher################################# | |
""" | |
THIS ONE NEEDS SOME FURTHER REFINEMENT, | |
AS WHAT ROLE SHOULD THE PUBLISHER BE MAPPED TO? | |
Right now I've decided to go with Publisher, or else fall back to point of contact of the Data | |
""" | |
publisher = {} | |
pub_individual_name = iso19139_xml.xpath( | |
'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:individualName/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
pub_organization_name = iso19139_xml.xpath( | |
'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:organisationName/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
pub_email = iso19139_xml.xpath( | |
'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
# Populate the 'publisher' dictionary | |
if pub_individual_name: | |
publisher['individualName'] = pub_individual_name[0] if pub_individual_name else None | |
if pub_organization_name: | |
publisher['organizationName'] = pub_organization_name[0] if pub_organization_name else None | |
if pub_email: | |
publisher['email'] = pub_email[0] if pub_email else None | |
publisher = [ | |
"Name="+ publisher['individualName'] if pub_individual_name else None, | |
"Organization="+ publisher['organizationName'] if pub_organization_name else None, | |
"email="+ publisher['email'] if pub_email else None, | |
] | |
poc = {} | |
poc_name = iso19139_xml.xpath( | |
'//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:individualName/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
poc_org_name = iso19139_xml.xpath( | |
'//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
poc_email = iso19139_xml.xpath( | |
'//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()', | |
namespaces=namespaces | |
) | |
# Populate the 'poc' dictionary | |
if poc_name: | |
poc['individualName'] = ', '.join(poc_name) | |
else: | |
poc['individualName'] = None | |
if poc_org_name: | |
poc['organizationName'] = ', '.join(poc_org_name) | |
else: | |
poc['organizationName'] = None | |
if poc_email: | |
poc['email'] = ', '.join(poc_email) | |
else: | |
poc['email'] = None | |
poc = [ | |
"Name="+ poc['individualName'] if poc else None, | |
"Organization="+ poc['organizationName'] if poc else None, | |
"email="+ poc['email'] if poc else None, | |
] | |
################################################################################################### | |
subject = iso19139_xml.xpath('//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()', namespaces=namespaces) | |
description = iso19139_xml.xpath('//gmd:abstract/gco:CharacterString/text()', namespaces=namespaces) | |
datetime = iso19139_xml.xpath('//gmd:dateStamp/gco:DateTime/text()', namespaces=namespaces) | |
date = iso19139_xml.xpath('//gmd:dateStamp/gco:Date/text()', namespaces=namespaces) | |
type_code = iso19139_xml.xpath('/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/gmd:MD_SpatialRepresentationTypeCode/@codeListValue', namespaces=namespaces) | |
Format = iso19139_xml.xpath('//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:name/gco:CharacterString/text()', namespaces=namespaces) | |
identifier = iso19139_xml.xpath('//gmd:fileIdentifier/gco:CharacterString/text()', namespaces=namespaces) | |
source= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:statement/gco:CharacterString/text()', namespaces=namespaces) | |
lang_code= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:language/gmd:LanguageCode/@codeListValue', namespaces=namespaces) | |
lang= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:language/gco:CharacterString/text()', namespaces=namespaces) | |
west_bound_longitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:westBoundLongitude/gco:Decimal/text()', namespaces=namespaces) | |
east_bound_longitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:eastBoundLongitude/gco:Decimal/text()', namespaces=namespaces) | |
north_bound_latitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:northBoundLatitude/gco:Decimal/text()', namespaces=namespaces) | |
south_bound_latitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:southBoundLatitude/gco:Decimal/text()', namespaces=namespaces) | |
temporal_start = iso19139_xml.xpath('//gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition/text()', namespaces=namespaces) | |
temporal_end = iso19139_xml.xpath('//gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition/text()', namespaces=namespaces) | |
coverage_values = [ | |
"North "+ north_bound_latitude[0] if north_bound_latitude else None, | |
"South "+south_bound_latitude[0] if south_bound_latitude else None, | |
"East "+east_bound_longitude[0] if east_bound_longitude else None, | |
"West "+west_bound_longitude[0] if west_bound_longitude else None, | |
"start= "+ temporal_start[0] if temporal_start else 'start=N/A', | |
"end="+temporal_end[0] if temporal_end else 'end=N/A', | |
] | |
rights=iso19139_xml.xpath('//gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:useLimitation/gco:CharacterString/text()', namespaces=namespaces) | |
#DCMI dictionary | |
dcmi_metadata = { | |
'title': title, | |
'creator': creator if any(item is not None for item in creator) else 'N/A', | |
'subject': subject, | |
'description': description, | |
'publisher': publisher if any(item is not None for item in publisher) else poc, #get publisher, else fall back to point of contact | |
'date': datetime[0] if datetime else date, | |
'type': type_code[0] if type_code else None, | |
'format': Format if Format else 'N/A', | |
'identifier': identifier, | |
'source': source, | |
'language': lang_code if lang_code else lang, | |
#TODO RELATION | |
'coverage': coverage_values, | |
#TODO LINEAGE | |
'rights': rights, | |
} | |
def list_to_string(value): | |
if value is None: | |
return "" | |
elif isinstance(value, list): | |
value = [str(item) if item is not None and str(item) != "None" else "N/A" for item in value] | |
return ', '.join(value) | |
else: | |
return str(value) | |
#XML CREATION BEGINS | |
import xml.etree.ElementTree as ET | |
# Define the root element | |
root = ET.Element( | |
"simpledc", | |
attrib={ | |
'xmlns:dc': 'http://purl.org/dc/elements/1.1/', | |
'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', | |
} | |
) | |
element_order = [ | |
'title', | |
'creator', | |
'subject', | |
'description', | |
'publisher', | |
'date', | |
'type', | |
'format', | |
'identifier', | |
'source', | |
'language', | |
'coverage', | |
'temporalCoverage', #TODO rename this as coverage element too | |
'rights', | |
] | |
# Create elements based on the dcmi_metadata dictionary in the desired order | |
for key in element_order: | |
if key == 'subject': | |
# Create a single <dc:subject> element for each subject | |
for subject_value in dcmi_metadata['subject']: | |
subject_element = ET.SubElement(root, 'dc:subject') | |
subject_element.text = subject_value | |
elif key in dcmi_metadata: | |
value = dcmi_metadata[key] | |
if isinstance(value, dict): | |
# Handle nested dictionaries, DCMI shouldnt have these? | |
sub_element = ET.SubElement(root, f"dc:{key}") | |
for sub_key, sub_value in value.items(): | |
sub_sub_element = ET.SubElement(sub_element, f"dc:{sub_key}") | |
sub_sub_element.text = list_to_string(sub_value) | |
else: | |
element = ET.SubElement(root, f"dc:{key}") | |
element.text = list_to_string(value) | |
# Create an XML string from the root element | |
xml_string = ET.tostring(root, encoding='utf-8', method='xml') | |
# Write the XML string to a new file | |
with open(f'{fileout}.xml', 'wb') as xml_file: | |
xml_file.write(xml_string) | |
print(f'Dublin Core generated @{fileout}.xml!!!') | |
if __name__ == '__main__': | |
dcmi_19115() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Modifications...