Last active
November 1, 2020 20:42
-
-
Save vaclavdekanovsky/c25c0304de1bf92d26ff28b257a0ced2 to your computer and use it in GitHub Desktop.
Update content of the unzipped XML file using ElementTree (part of python standard library)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-01T20:42:13.277587Z", | |
"start_time": "2020-11-01T20:42:13.257609Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"from xml.etree import ElementTree as ET" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We will use the standard python ElementTree library to parse the xmls\n", | |
"https://docs.python.org/3.9/library/xml.etree.elementtree.html#module-xml.etree.ElementTree. We " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-01T20:42:13.293513Z", | |
"start_time": "2020-11-01T20:42:13.279585Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"folder = \"temp\"\n", | |
"file = \"a.xml\"\n", | |
"\n", | |
"# values to which we update\n", | |
"new_prefix = \"updated\"\n", | |
"new_version = 3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-01T20:42:13.309473Z", | |
"start_time": "2020-11-01T20:42:13.295512Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<Element 'data' at 0x0000020CC393C1D8>" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# path to the unzipped xml in the temp folder\n", | |
"path = os.path.join(folder, file)\n", | |
"\n", | |
"# load the xml\n", | |
"tree = ET.parse(path)\n", | |
"root = tree.getroot()\n", | |
"\n", | |
"# the wrapping tag <data> is the root element of our loaded xml\n", | |
"root" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-01T20:42:13.325429Z", | |
"start_time": "2020-11-01T20:42:13.311464Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'updated_003_20201026'" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# in the <data> find the <id>...</id> node and show its content (.text)\n", | |
"id = root.find(\"id\").text\n", | |
"id" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-01T20:42:13.341384Z", | |
"start_time": "2020-11-01T20:42:13.327424Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# split the id by \"_\" underscore\n", | |
"split_id = id.split(\"_\")\n", | |
"\n", | |
"# update the values\n", | |
"split_id[0] = new_prefix\n", | |
"split_id[1] = \"{:03d}\".format(new_version) # prefix with leading zeros up to 3 position if needed\n", | |
"\n", | |
"# update the xml content\n", | |
"root.find(\"id\").text = \"_\".join(split_id)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-01T20:42:13.357343Z", | |
"start_time": "2020-11-01T20:42:13.343379Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# any variable derived from root contains a reference to the original xml\n", | |
"# so using tree.write writes the updated contant to the path\n", | |
"tree.write(path)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.4" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"varInspector": { | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"delete_cmd_postfix": "", | |
"delete_cmd_prefix": "del ", | |
"library": "var_list.py", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"delete_cmd_postfix": ") ", | |
"delete_cmd_prefix": "rm(", | |
"library": "var_list.r", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
], | |
"window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment