Last active
November 9, 2023 05:10
-
-
Save zoharbabin/9fad69a595747037340867d99822c7c7 to your computer and use it in GitHub Desktop.
Utility to manage Kaltura entry metadata, supporting retrieval, creation, and updating of custom metadata profiles and items
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Kaltura Metadata Utility Script | |
This script facilitates the management of custom metadata associated with entries on the Kaltura media platform. | |
It provides a set of operations that interface with the Kaltura API, enabling users to retrieve, update, and maintain | |
the consistency of metadata. The script ensures that metadata fields are structured and ordered according to the rules | |
defined in a Metadata Profile's XSD (XML Schema Definition). | |
Usage Example: | |
```bash | |
python kaltura_metadata_xml_util.py PARTNER_ID "API_ADMIN_SECRET" METADATA_PROFILE_ID "ENTRY_ID" | |
``` | |
Prerequisites: | |
- A Kaltura account with administrative privileges. | |
- Access to the Kaltura admin secret and partner ID for API authentication. | |
- The Kaltura Python client libraries installed in your Python environment. | |
- Basic understanding of XML and XSD structures. | |
Main Features: | |
- Fetch and parse the XSD of a Metadata Profile to understand the structure of metadata expected. | |
- Generate a new XML metadata template based on the profile's XSD, which can be used as a starting point for new entries. | |
- Retrieve existing metadata for an entry and present it in a structured format that aligns with the profile's XSD. | |
- Validate and add new metadata values to an entry, ensuring that the values meet the constraints set by the profile's XSD. | |
- Remove empty or redundant metadata elements that do not contain any data. | |
- Update existing metadata entries with new or modified values, and apply these updates to the Kaltura platform. | |
Usage and Extension: | |
- To use the script, one must pass the partner ID, admin secret, metadata profile ID, and the entry ID as arguments. | |
- The script can be extended to handle bulk updates by looping over multiple entries. | |
- Additional functions can be implemented to support more complex metadata operations, such as conditional updates, or synchronization with external data sources. | |
- Users can extend the error handling capabilities to provide more granular feedback. | |
- To facilitate ease of use, consider adding an interactive command-line interface or integrating with a web-based UI. | |
""" | |
import sys | |
import logging | |
import argparse | |
from lxml import etree as ET | |
from typing import List, Any, Optional | |
from KalturaClient import KalturaClient, KalturaConfiguration | |
from KalturaClient.Plugins.Core import KalturaSessionType, KalturaFilterPager | |
from KalturaClient.Plugins.Metadata import ( KalturaMetadataFilter, KalturaMetadataProfile, KalturaMetadata, | |
KalturaMetadataObjectType ) | |
from KalturaClient.exceptions import KalturaException | |
# Configuration Constants | |
SERVICE_URL = "https://cdnapi-ev.kaltura.com/" | |
SESSION_TYPE = KalturaSessionType.ADMIN | |
SESSION_DURATION = 86400 | |
SESSION_PRIVILEGES = '*,disableentitlement' | |
SCRIPT_USER_ID = "metadata-tester" | |
XSD_NAMESPACE_URL = 'http://www.w3.org/2001/XMLSchema' | |
XSD_NAMESPACE = {'xsd': XSD_NAMESPACE_URL} | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
# Helper functions and classes | |
class MetadataUtils: | |
@staticmethod | |
def parse_xsd(xsd_string: str) -> ET.Element: | |
try: | |
parser = ET.XMLParser(resolve_entities=False) | |
return ET.fromstring(xsd_string, parser=parser) | |
except ET.XMLSyntaxError as e: | |
logging.error(f"Error parsing XSD: {e}") | |
raise | |
@staticmethod | |
def build_metadata_template(xsd_root: ET.Element) -> ET.Element: | |
metadata_element = ET.Element('metadata') | |
for element in xsd_root.findall(".//xsd:element", XSD_NAMESPACE): | |
if element.get('name') != 'metadata': | |
ET.SubElement(metadata_element, element.get('name')).text = '' | |
return metadata_element | |
@staticmethod | |
def get_metadata_template_with_values(metadata_item: KalturaMetadata, xsd_root: ET.Element) -> ET.Element: | |
template_tree = MetadataUtils.build_metadata_template(xsd_root) | |
item_tree = ET.fromstring(metadata_item.xml) | |
for elem in template_tree.iter(): | |
corresponding = item_tree.find(f'.//{elem.tag}') | |
if corresponding is not None: | |
elem.text = corresponding.text | |
return template_tree | |
@staticmethod | |
def pretty_print_element(element: ET.Element) -> str: | |
return ET.tostring(element, pretty_print=True, encoding='unicode') | |
@staticmethod | |
def is_field_multi_valued(field_name: str, xsd_root: ET.Element) -> bool: | |
""" | |
Determines whether a field is multi-valued based on the XSD. | |
""" | |
xsd_element = xsd_root.find(f".//xsd:element[@name='{field_name}']", namespaces=XSD_NAMESPACE) | |
if xsd_element is not None: | |
return xsd_element.get('maxOccurs') not in (None, '1') | |
else: | |
logging.warning(f"XSD does not define field '{field_name}'.") | |
return False | |
@staticmethod | |
def get_restriction_values(field_name: str, xsd_root: ET.Element) -> List[str]: | |
""" | |
Retrieves a list of allowed values for a field based on the XSD restrictions. | |
""" | |
field_type_element = xsd_root.find( | |
f".//xsd:element[@name='{field_name}']/xsd:simpleType", namespaces=XSD_NAMESPACE | |
) | |
if field_type_element is None: | |
field_type_element = xsd_root.find( | |
f".//xsd:element[@name='{field_name}']/../xsd:simpleType", namespaces=XSD_NAMESPACE | |
) | |
if field_type_element is not None: | |
restriction = field_type_element.find('xsd:restriction', namespaces=XSD_NAMESPACE) | |
if restriction is not None: | |
return [enum.get('value') for enum in restriction.findall('xsd:enumeration', namespaces=XSD_NAMESPACE)] | |
return [] | |
@staticmethod | |
def find_position_for_new_element(metadata_element: ET.Element, field_name: str, xsd_root: ET.Element) -> Optional[int]: | |
""" | |
Finds the position where the new element should be inserted in the metadata element. | |
""" | |
# Assume the first sequence is where the metadata fields should be ordered | |
sequence = xsd_root.find('.//xsd:complexType/xsd:sequence', XSD_NAMESPACE) | |
if sequence is not None: | |
for index, element in enumerate(sequence.findall('xsd:element', XSD_NAMESPACE)): | |
if element.get('name') == field_name: | |
return index | |
return None | |
@staticmethod | |
def remove_empty_elements(parent: ET.Element, field_name: str) -> None: | |
""" | |
Removes all empty elements with the given field name from the parent element. | |
""" | |
for element in parent.findall(f".//{field_name}"): | |
if element.text is None or not element.text.strip(): | |
parent.remove(element) | |
@staticmethod | |
def add_value_to_metadata(metadata_element: ET.Element, field_name: str, value: Any, xsd_root: ET.Element) -> None: | |
""" | |
Adds or updates a value for a specific field within the metadata structure. | |
""" | |
multi_valued = MetadataUtils.is_field_multi_valued(field_name, xsd_root) | |
restriction_values = MetadataUtils.get_restriction_values(field_name, xsd_root) | |
if restriction_values and value not in restriction_values: | |
raise ValueError(f"Value '{value}' is not allowed for field '{field_name}' based on the XSD restrictions.") | |
existing_elements = metadata_element.findall(f".//{field_name}") | |
if multi_valued: | |
# For multi-valued fields, we add a new element for each value | |
new_value_element = ET.Element(field_name) | |
new_value_element.text = str(value) | |
position = MetadataUtils.find_position_for_new_element(metadata_element, field_name, xsd_root) | |
if position is not None: | |
metadata_element.insert(position, new_value_element) | |
else: | |
# If position is None, append at the end | |
metadata_element.append(new_value_element) | |
# Optionally, remove empty elements if needed | |
MetadataUtils.remove_empty_elements(metadata_element, field_name) | |
else: | |
if existing_elements: | |
# For single-valued fields, we update the existing element | |
existing_elements[0].text = str(value) | |
# Remove any additional elements that may exist | |
for elem in existing_elements[1:]: | |
metadata_element.remove(elem) | |
else: | |
# If no element found, create a new one | |
new_value_element = ET.Element(field_name) | |
new_value_element.text = str(value) | |
position = MetadataUtils.find_position_for_new_element(metadata_element, field_name, xsd_root) | |
if position is not None: | |
metadata_element.insert(position, new_value_element) | |
else: | |
# If position is None, append at the end | |
metadata_element.append(new_value_element) | |
return metadata_element | |
class KalturaMetadataManager: | |
def __init__(self, partner_id: int, admin_secret: str): | |
self.client = self._create_client(partner_id, admin_secret) | |
def _create_client(self, partner_id: int, admin_secret: str) -> KalturaClient: | |
config = KalturaConfiguration(partner_id) | |
config.serviceUrl = SERVICE_URL | |
client = KalturaClient(config) | |
ks = client.generateSessionV2( | |
admin_secret, SCRIPT_USER_ID, SESSION_TYPE, | |
partner_id, SESSION_DURATION, SESSION_PRIVILEGES) | |
client.setKs(ks) | |
return client | |
def fetch_metadata_profile(self, profile_id: int) -> str: | |
try: | |
metadata_profile: KalturaMetadataProfile = self.client.metadata.metadataProfile.get(profile_id) | |
return metadata_profile.xsd | |
except KalturaException as e: | |
logging.error(f"Error fetching metadata profile: {e}") | |
raise | |
def check_metadata_exists(self, entry_id: str, profile_id: int) -> bool: | |
filter = KalturaMetadataFilter() | |
filter.metadataProfileIdEqual = profile_id | |
filter.metadataObjectTypeEqual = KalturaMetadataObjectType.ENTRY | |
filter.objectIdEqual = entry_id | |
pager = KalturaFilterPager() | |
result = self.client.metadata.metadata.list(filter, pager).objects | |
return len(result) > 0, result[0] if result else None | |
def create_or_get_metadata(self, entry_id: str, profile_id: int, xsd_root: ET.Element) -> ET.Element: | |
metadata_exists, metadata_item = self.check_metadata_exists(entry_id, profile_id) | |
if metadata_exists and metadata_item: | |
metadata_xml = MetadataUtils.get_metadata_template_with_values(metadata_item, xsd_root) | |
else: | |
metadata_xml = MetadataUtils.build_metadata_template(xsd_root) | |
self.populate_default_values(metadata_xml, xsd_root) | |
return metadata_xml | |
def populate_default_values(self, metadata_xml: ET.Element, xsd_root: ET.Element) -> None: | |
""" | |
Populates default values for list types based on XSD enumeration restrictions. | |
If an empty value is not allowed, the first value in the enumeration is selected. | |
""" | |
for element in xsd_root.findall(".//xsd:element", XSD_NAMESPACE): | |
name = element.get('name') | |
if name and name != 'metadata': | |
restriction_values = MetadataUtils.get_restriction_values(name, xsd_root) | |
if restriction_values and not MetadataUtils.is_field_multi_valued(name, xsd_root): | |
first_value = restriction_values[0] | |
metadata_element = metadata_xml.find(f".//{name}") | |
if metadata_element is not None and (not metadata_element.text or not metadata_element.text.strip()): | |
metadata_element.text = first_value | |
def update_metadata(self, metadata_id: int, xml: str) -> KalturaMetadata: | |
try: | |
return self.client.metadata.metadata.update(metadata_id, xml) | |
except KalturaException as e: | |
logging.error(f"Error updating metadata: {e}") | |
raise | |
def add_metadata(self, profile_id: int, object_type: KalturaMetadataObjectType, object_id: str, xml: str) -> KalturaMetadata: | |
try: | |
return self.client.metadata.metadata.add(profile_id, object_type, object_id, xml) | |
except KalturaException as e: | |
logging.error(f"Error adding metadata: {e}") | |
raise | |
def apply_metadata_to_entry(self, entry_id: str, profile_id: int, xml: str) -> KalturaMetadata: | |
metadata_exists, metadata_item = self.check_metadata_exists(entry_id, profile_id) | |
if metadata_exists: | |
return self.update_metadata(metadata_item.id, xml) | |
else: | |
return self.add_metadata(profile_id, KalturaMetadataObjectType.ENTRY, entry_id, xml) | |
def parse_arguments() -> argparse.Namespace: | |
parser = argparse.ArgumentParser(description='Kaltura Metadata Utility Script') | |
parser.add_argument('partner_id', type=int, help='Kaltura partner ID') | |
parser.add_argument('admin_secret', help='Kaltura admin secret') | |
parser.add_argument('profile_id', type=int, help='Metadata profile ID') | |
parser.add_argument('entry_id', help='Entry ID to update metadata for') | |
return parser.parse_args() | |
def main(): | |
args = parse_arguments() | |
# instantiate | |
kaltura_manager = KalturaMetadataManager(args.partner_id, args.admin_secret) | |
# parse the schema | |
xsd_string = kaltura_manager.fetch_metadata_profile(args.profile_id) | |
xsd_root = MetadataUtils.parse_xsd(xsd_string) | |
# create a metadata template or fetch an existing metadata item xml from the API | |
metadata_xml = kaltura_manager.create_or_get_metadata(args.entry_id, args.profile_id, xsd_root) | |
try: | |
# make updates to specific fields | |
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Email', '[email protected]', xsd_root) | |
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Email', '[email protected]', xsd_root) # will override the previous value | |
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Format', 'Go-Pro camera', xsd_root) | |
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Categories', 'Testimonials', xsd_root) | |
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Categories', 'Nature party', xsd_root) | |
print("Metadata updated successfully.") | |
except ValueError as e: | |
print(f"Error while updating metadata: {e}") | |
try: | |
# add or update the metadata item to the entry | |
updated_metadata = kaltura_manager.apply_metadata_to_entry(args.entry_id, args.profile_id, ET.tostring(metadata_xml, encoding='unicode')) | |
print(f"Metadata for entry {args.entry_id} has been upsert.") | |
except KalturaException as e: | |
print(f"Error while applying metadata to entry: {e}") | |
# pretty print the xml | |
pretty_xml = MetadataUtils.pretty_print_element(metadata_xml) | |
print(pretty_xml) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment