ChocopieKewpie · July 16, 2024 23:31 · ChocopieKewpie · Jul 16, 2024
diff --git a/139_to_DC.py b/139_to_DC.py
 # -*- coding: utf-8 -*-
 """
 Created on Wed Sep  6 15:28:29 2023

 @author: ArdoJ
 """

 import click
 from lxml import etree

 @click.command()
 @click.argument("f_input", required=True, type=click.Path(), nargs=1)
 @click.argument("f_output", required=True, type=click.Path(), nargs=1)

 def dcmi_19115(f_input, f_output):
    
    file=(str(f_input))
    fileout=(str(f_output))
    

    # Load the ISO 19139 XML document
    iso19139_xml = etree.parse(file)

    namespaces = {
        'gmd': 'http://www.isotc211.org/2005/gmd',
        'gco': 'http://www.isotc211.org/2005/gco',
        'gml': 'http://www.opengis.net/gml/3.2',
    }


    #DCMI Mapping from ISO19139
    title = iso19139_xml.xpath('//gmd:title/gco:CharacterString/text()', namespaces=namespaces)

    ################################Section outlines the CREATOR#####################################
    creator = {}
    creator_individual_name = iso19139_xml.xpath(
        '//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:individualName/gco:CharacterString/text()',
        namespaces=namespaces
    )
    creator_organization_name = iso19139_xml.xpath(
        '//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:organisationName/gco:CharacterString/text()',
        namespaces=namespaces
    )
    creator_email = iso19139_xml.xpath(
        '//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()',
        namespaces=namespaces
    )

    # Populate the 'creator' dictionary
    if creator_individual_name:
        creator['individualName'] = creator_individual_name[0] if creator_individual_name else None
    if creator_organization_name:
        creator['organizationName'] = creator_organization_name[0] if creator_organization_name else None
    if creator_email:
        creator['email'] = creator_email[0] if creator_email else None

    creator = [
        "Name="+ creator['individualName'] if creator else None,
        "Organization="+ creator['organizationName'] if creator else None,
        "email="+ creator['email'] if creator else None,
    ]
        
    ###################################################################################################

    ####################################Section outlines the Publisher#################################
    """
    THIS ONE NEEDS SOME FURTHER REFINEMENT, 
    AS WHAT ROLE SHOULD THE PUBLISHER BE MAPPED TO? 
    Right now I've decided to go with Publisher, or else fall back to point of contact of the Data
    """

    publisher = {}
    pub_individual_name = iso19139_xml.xpath(
        '//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:individualName/gco:CharacterString/text()',
        namespaces=namespaces
    )
    pub_organization_name = iso19139_xml.xpath(
        '//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:organisationName/gco:CharacterString/text()',
        namespaces=namespaces
    )
    pub_email = iso19139_xml.xpath(
        '//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()',
        namespaces=namespaces
    )

    # Populate the 'publisher' dictionary
    if pub_individual_name:
        publisher['individualName'] = pub_individual_name[0] if pub_individual_name else None
    if pub_organization_name:
        publisher['organizationName'] = pub_organization_name[0] if pub_organization_name else None
    if pub_email:
        publisher['email'] = pub_email[0] if pub_email else None

    publisher = [
        "Name="+ publisher['individualName'] if pub_individual_name else None,
        "Organization="+ publisher['organizationName'] if pub_organization_name else None,
        "email="+ publisher['email'] if pub_email else None,
    ]



    poc = {}
    poc_name = iso19139_xml.xpath(
        '//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:individualName/gco:CharacterString/text()',
        namespaces=namespaces
    )
    poc_org_name = iso19139_xml.xpath(
        '//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString/text()',
        namespaces=namespaces
    )
    poc_email = iso19139_xml.xpath(
        '//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()',
        namespaces=namespaces
    )

    # Populate the 'poc' dictionary
    if poc_name:
        poc['individualName'] = ', '.join(poc_name)
    else:
        poc['individualName'] = None
    if poc_org_name:
        poc['organizationName'] = ', '.join(poc_org_name)
    else:
        poc['organizationName'] = None
    if poc_email:
        poc['email'] = ', '.join(poc_email)
    else:
        poc['email'] = None

    poc = [
        "Name="+ poc['individualName'] if poc else None,
        "Organization="+ poc['organizationName'] if poc else None,
        "email="+ poc['email'] if poc else None,
    ]
    ###################################################################################################
        
    subject = iso19139_xml.xpath('//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()', namespaces=namespaces)

    description = iso19139_xml.xpath('//gmd:abstract/gco:CharacterString/text()', namespaces=namespaces)

    datetime = iso19139_xml.xpath('//gmd:dateStamp/gco:DateTime/text()', namespaces=namespaces)

    date = iso19139_xml.xpath('//gmd:dateStamp/gco:Date/text()', namespaces=namespaces)

    type_code = iso19139_xml.xpath('/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/gmd:MD_SpatialRepresentationTypeCode/@codeListValue', namespaces=namespaces)

    Format = iso19139_xml.xpath('//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:name/gco:CharacterString/text()', namespaces=namespaces)

    identifier = iso19139_xml.xpath('//gmd:fileIdentifier/gco:CharacterString/text()', namespaces=namespaces)

    source= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:statement/gco:CharacterString/text()', namespaces=namespaces)

    lang_code= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:language/gmd:LanguageCode/@codeListValue', namespaces=namespaces)
    lang= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:language/gco:CharacterString/text()', namespaces=namespaces)

    west_bound_longitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:westBoundLongitude/gco:Decimal/text()', namespaces=namespaces)
    east_bound_longitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:eastBoundLongitude/gco:Decimal/text()', namespaces=namespaces)
    north_bound_latitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:northBoundLatitude/gco:Decimal/text()', namespaces=namespaces)
    south_bound_latitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:southBoundLatitude/gco:Decimal/text()', namespaces=namespaces)


    temporal_start = iso19139_xml.xpath('//gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition/text()', namespaces=namespaces)
    temporal_end = iso19139_xml.xpath('//gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition/text()', namespaces=namespaces)

    coverage_values = [
        "North "+ north_bound_latitude[0] if north_bound_latitude else None,
        "South "+south_bound_latitude[0] if south_bound_latitude else None,
        "East "+east_bound_longitude[0] if east_bound_longitude else None,
        "West "+west_bound_longitude[0] if west_bound_longitude else None,
        "start= "+ temporal_start[0] if temporal_start else 'start=N/A',
        "end="+temporal_end[0] if temporal_end else 'end=N/A',
    ]
    rights=iso19139_xml.xpath('//gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:useLimitation/gco:CharacterString/text()', namespaces=namespaces)

    #DCMI dictionary
    dcmi_metadata = {
        'title': title,
        'creator': creator if any(item is not None for item in creator) else 'N/A', 
        'subject': subject,
        'description': description,
        'publisher': publisher if any(item is not None for item in publisher) else poc, #get publisher, else fall back to point of contact
        'date': datetime[0] if datetime else date,
        'type': type_code[0] if type_code else None,
        'format': Format if Format else 'N/A',
        'identifier': identifier,
        'source': source,
        'language': lang_code if lang_code else lang,
        #TODO RELATION
        'coverage': coverage_values,
        #TODO LINEAGE
        'rights': rights,
        }

    def list_to_string(value):
        if value is None:
            return ""
        elif isinstance(value, list):
            value = [str(item) if item is not None and str(item) != "None" else "N/A" for item in value]
            return ', '.join(value)
        else:
            return str(value)

    #XML CREATION BEGINS
    import xml.etree.ElementTree as ET

    # Define the root element
    root = ET.Element(
        "simpledc",
        attrib={
            'xmlns:dc': 'http://purl.org/dc/elements/1.1/',
            'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
        }
    )

    element_order = [
        'title',
        'creator',
        'subject',
        'description',
        'publisher',
        'date',
        'type',
        'format',
        'identifier',
        'source',
        'language',
        'coverage',
        'temporalCoverage', #TODO rename this as coverage element too
        'rights',
    ]

    # Create elements based on the dcmi_metadata dictionary in the desired order
    for key in element_order:
        if key == 'subject':
            # Create a single <dc:subject> element for each subject
            for subject_value in dcmi_metadata['subject']:
                subject_element = ET.SubElement(root, 'dc:subject')
                subject_element.text = subject_value
        elif key in dcmi_metadata:
            value = dcmi_metadata[key]
            if isinstance(value, dict):
                # Handle nested dictionaries, DCMI shouldnt have these? 
                sub_element = ET.SubElement(root, f"dc:{key}")
                for sub_key, sub_value in value.items():
                    sub_sub_element = ET.SubElement(sub_element, f"dc:{sub_key}")
                    sub_sub_element.text = list_to_string(sub_value)
            else:
                element = ET.SubElement(root, f"dc:{key}")
                element.text = list_to_string(value)


    # Create an XML string from the root element
    xml_string = ET.tostring(root, encoding='utf-8', method='xml')

    # Write the XML string to a new file
    with open(f'{fileout}.xml', 'wb') as xml_file:
        xml_file.write(xml_string)
    
    print(f'Dublin Core generated @{fileout}.xml!!!')

 if __name__ == '__main__':
    dcmi_19115()
	# -- coding: utf-8 --
	"""
	Created on Wed Sep 6 15:28:29 2023

	@author: ArdoJ
	"""

	import click
	from lxml import etree

	@click.command()
	@click.argument("f_input", required=True, type=click.Path(), nargs=1)
	@click.argument("f_output", required=True, type=click.Path(), nargs=1)

	def dcmi_19115(f_input, f_output):

	file=(str(f_input))
	fileout=(str(f_output))


	# Load the ISO 19139 XML document
	iso19139_xml = etree.parse(file)

	namespaces = {
	'gmd': 'http://www.isotc211.org/2005/gmd',
	'gco': 'http://www.isotc211.org/2005/gco',
	'gml': 'http://www.opengis.net/gml/3.2',
	}


	#DCMI Mapping from ISO19139
	title = iso19139_xml.xpath('//gmd:title/gco:CharacterString/text()', namespaces=namespaces)

	################################Section outlines the CREATOR#####################################
	creator = {}
	creator_individual_name = iso19139_xml.xpath(
	'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:individualName/gco:CharacterString/text()',
	namespaces=namespaces
	)
	creator_organization_name = iso19139_xml.xpath(
	'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:organisationName/gco:CharacterString/text()',
	namespaces=namespaces
	)
	creator_email = iso19139_xml.xpath(
	'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="originator"]/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()',
	namespaces=namespaces
	)

	# Populate the 'creator' dictionary
	if creator_individual_name:
	creator['individualName'] = creator_individual_name[0] if creator_individual_name else None
	if creator_organization_name:
	creator['organizationName'] = creator_organization_name[0] if creator_organization_name else None
	if creator_email:
	creator['email'] = creator_email[0] if creator_email else None

	creator = [
	"Name="+ creator['individualName'] if creator else None,
	"Organization="+ creator['organizationName'] if creator else None,
	"email="+ creator['email'] if creator else None,
	]

	###################################################################################################

	####################################Section outlines the Publisher#################################
	"""
	THIS ONE NEEDS SOME FURTHER REFINEMENT,
	AS WHAT ROLE SHOULD THE PUBLISHER BE MAPPED TO?
	Right now I've decided to go with Publisher, or else fall back to point of contact of the Data
	"""

	publisher = {}
	pub_individual_name = iso19139_xml.xpath(
	'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:individualName/gco:CharacterString/text()',
	namespaces=namespaces
	)
	pub_organization_name = iso19139_xml.xpath(
	'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:organisationName/gco:CharacterString/text()',
	namespaces=namespaces
	)
	pub_email = iso19139_xml.xpath(
	'//gmd:CI_ResponsibleParty[gmd:role/gmd:CI_RoleCode/text()="publisher"]/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()',
	namespaces=namespaces
	)

	# Populate the 'publisher' dictionary
	if pub_individual_name:
	publisher['individualName'] = pub_individual_name[0] if pub_individual_name else None
	if pub_organization_name:
	publisher['organizationName'] = pub_organization_name[0] if pub_organization_name else None
	if pub_email:
	publisher['email'] = pub_email[0] if pub_email else None

	publisher = [
	"Name="+ publisher['individualName'] if pub_individual_name else None,
	"Organization="+ publisher['organizationName'] if pub_organization_name else None,
	"email="+ publisher['email'] if pub_email else None,
	]



	poc = {}
	poc_name = iso19139_xml.xpath(
	'//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:individualName/gco:CharacterString/text()',
	namespaces=namespaces
	)
	poc_org_name = iso19139_xml.xpath(
	'//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString/text()',
	namespaces=namespaces
	)
	poc_email = iso19139_xml.xpath(
	'//gmd:MD_DataIdentification/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()',
	namespaces=namespaces
	)

	# Populate the 'poc' dictionary
	if poc_name:
	poc['individualName'] = ', '.join(poc_name)
	else:
	poc['individualName'] = None
	if poc_org_name:
	poc['organizationName'] = ', '.join(poc_org_name)
	else:
	poc['organizationName'] = None
	if poc_email:
	poc['email'] = ', '.join(poc_email)
	else:
	poc['email'] = None

	poc = [
	"Name="+ poc['individualName'] if poc else None,
	"Organization="+ poc['organizationName'] if poc else None,
	"email="+ poc['email'] if poc else None,
	]
	###################################################################################################

	subject = iso19139_xml.xpath('//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()', namespaces=namespaces)

	description = iso19139_xml.xpath('//gmd:abstract/gco:CharacterString/text()', namespaces=namespaces)

	datetime = iso19139_xml.xpath('//gmd:dateStamp/gco:DateTime/text()', namespaces=namespaces)

	date = iso19139_xml.xpath('//gmd:dateStamp/gco:Date/text()', namespaces=namespaces)

	type_code = iso19139_xml.xpath('/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/gmd:MD_SpatialRepresentationTypeCode/@codeListValue', namespaces=namespaces)

	Format = iso19139_xml.xpath('//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:name/gco:CharacterString/text()', namespaces=namespaces)

	identifier = iso19139_xml.xpath('//gmd:fileIdentifier/gco:CharacterString/text()', namespaces=namespaces)

	source= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:statement/gco:CharacterString/text()', namespaces=namespaces)

	lang_code= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:language/gmd:LanguageCode/@codeListValue', namespaces=namespaces)
	lang= iso19139_xml.xpath('/gmd:MD_Metadata/gmd:language/gco:CharacterString/text()', namespaces=namespaces)

	west_bound_longitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:westBoundLongitude/gco:Decimal/text()', namespaces=namespaces)
	east_bound_longitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:eastBoundLongitude/gco:Decimal/text()', namespaces=namespaces)
	north_bound_latitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:northBoundLatitude/gco:Decimal/text()', namespaces=namespaces)
	south_bound_latitude = iso19139_xml.xpath('//gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/gmd:southBoundLatitude/gco:Decimal/text()', namespaces=namespaces)


	temporal_start = iso19139_xml.xpath('//gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition/text()', namespaces=namespaces)
	temporal_end = iso19139_xml.xpath('//gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition/text()', namespaces=namespaces)

	coverage_values = [
	"North "+ north_bound_latitude[0] if north_bound_latitude else None,
	"South "+south_bound_latitude[0] if south_bound_latitude else None,
	"East "+east_bound_longitude[0] if east_bound_longitude else None,
	"West "+west_bound_longitude[0] if west_bound_longitude else None,
	"start= "+ temporal_start[0] if temporal_start else 'start=N/A',
	"end="+temporal_end[0] if temporal_end else 'end=N/A',
	]
	rights=iso19139_xml.xpath('//gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:useLimitation/gco:CharacterString/text()', namespaces=namespaces)

	#DCMI dictionary
	dcmi_metadata = {
	'title': title,
	'creator': creator if any(item is not None for item in creator) else 'N/A',
	'subject': subject,
	'description': description,
	'publisher': publisher if any(item is not None for item in publisher) else poc, #get publisher, else fall back to point of contact
	'date': datetime[0] if datetime else date,
	'type': type_code[0] if type_code else None,
	'format': Format if Format else 'N/A',
	'identifier': identifier,
	'source': source,
	'language': lang_code if lang_code else lang,
	#TODO RELATION
	'coverage': coverage_values,
	#TODO LINEAGE
	'rights': rights,
	}

	def list_to_string(value):
	if value is None:
	return ""
	elif isinstance(value, list):
	value = [str(item) if item is not None and str(item) != "None" else "N/A" for item in value]
	return ', '.join(value)
	else:
	return str(value)

	#XML CREATION BEGINS
	import xml.etree.ElementTree as ET

	# Define the root element
	root = ET.Element(
	"simpledc",
	attrib={
	'xmlns:dc': 'http://purl.org/dc/elements/1.1/',
	'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
	}
	)

	element_order = [
	'title',
	'creator',
	'subject',
	'description',
	'publisher',
	'date',
	'type',
	'format',
	'identifier',
	'source',
	'language',
	'coverage',
	'temporalCoverage', #TODO rename this as coverage element too
	'rights',
	]

	# Create elements based on the dcmi_metadata dictionary in the desired order
	for key in element_order:
	if key == 'subject':
	# Create a single <dc:subject> element for each subject
	for subject_value in dcmi_metadata['subject']:
	subject_element = ET.SubElement(root, 'dc:subject')
	subject_element.text = subject_value
	elif key in dcmi_metadata:
	value = dcmi_metadata[key]
	if isinstance(value, dict):
	# Handle nested dictionaries, DCMI shouldnt have these?
	sub_element = ET.SubElement(root, f"dc:{key}")
	for sub_key, sub_value in value.items():
	sub_sub_element = ET.SubElement(sub_element, f"dc:{sub_key}")
	sub_sub_element.text = list_to_string(sub_value)
	else:
	element = ET.SubElement(root, f"dc:{key}")
	element.text = list_to_string(value)


	# Create an XML string from the root element
	xml_string = ET.tostring(root, encoding='utf-8', method='xml')

	# Write the XML string to a new file
	with open(f'{fileout}.xml', 'wb') as xml_file:
	xml_file.write(xml_string)

	print(f'Dublin Core generated @{fileout}.xml!!!')

	if __name__ == '__main__':
	dcmi_19115()