Last active
December 23, 2020 10:28
-
-
Save neilhawkins/120cf80b1a2f33ef85c2ce51580723a1 to your computer and use it in GitHub Desktop.
Initial transforms needed to clean up UK Electoral Commission data exports
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"op": "core/text-transform", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Value", | |
"expression": "grel:value.replace(/[,£]/,'').toNumber()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10, | |
"description": "Text transform on cells in column Value using expression grel:value.replace(/[,£]/,'').toNumber()" | |
}, | |
{ | |
"op": "core/text-transform", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "AcceptedDate", | |
"expression": "value.toDate()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10, | |
"description": "Text transform on cells in column AcceptedDate using expression value.toDate()" | |
}, | |
{ | |
"op": "core/text-transform", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "ReceivedDate", | |
"expression": "value.toDate()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10, | |
"description": "Text transform on cells in column ReceivedDate using expression value.toDate()" | |
}, | |
{ | |
"op": "core/text-transform", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "ReportedDate", | |
"expression": "value.toDate()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10, | |
"description": "Text transform on cells in column ReportedDate using expression value.toDate()" | |
}, | |
{ | |
"op": "core/column-addition", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "RegulatedDoneeType", | |
"expression": "value", | |
"columnName": "RegulatedDoneeType", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "MP - Member of Parliament", | |
"l": "MP - Member of Parliament" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"baseColumnName": "RegulatedEntityName", | |
"expression": "grel:value", | |
"onError": "set-to-blank", | |
"newColumnName": "DupeName", | |
"columnInsertIndex": 2, | |
"description": "Create column DupeName at index 2 based on column RegulatedEntityName using expression grel:value" | |
}, | |
{ | |
"op": "core/column-addition", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "RegulatedDoneeType", | |
"expression": "value", | |
"columnName": "RegulatedDoneeType", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "MP - Member of Parliament", | |
"l": "MP - Member of Parliament" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"baseColumnName": "DupeName", | |
"expression": "grel:value.split(/(Mr|Mrs|Miss|Ms|Dr|Admiral|Air Comm|Ambassador|Baron|Baroness|Brig & Mrs|Brig Gen|Brigadier|Brother|Canon|Capt|Chief|Cllr|Col|Commander|Commander & Mrs|Consul|Consul General|Count|Countess|Countess of|Cpl|Dame|Deputy|Dr & Mrs|Drs|Duchess|Duke|Earl|Father|General|Gräfin|HE|HMA|Her Grace|His Excellency|Ing|Judge|Justice|Lady|Lic|Llc|Lord|Lord & Lady|Lt|Lt Col|Lt Cpl|Madam|Madame|Major|Major General|Marchioness|Marquis|Minister|Mme|Mr & Dr|Mr & Mrs|Mr & Ms|Prince|Princess|Prof|Prof & Dr|Prof & Mrs|Prof & Rev|Prof Dame|Prof Dr|Pvt|Rabbi|Rear Admiral|Rev|Rev & Mrs|Rev Canon|Rev Dr|Senator|Sgt|Sheriff|Sir|Sir & Lady|Sister|The Earl of|The Hon|The Hon Dr|The Hon Lady|The Hon Lord|The Hon Mrs|The Hon Sir|The Honourable|The Rt Hon|The Rt Hon Dr|The Rt Hon Lord|The Rt Hon Sir|The Rt Hon Visc|Viscount)/, value)[1]", | |
"onError": "set-to-blank", | |
"newColumnName": "suffix", | |
"columnInsertIndex": 3, | |
"description": "Create column suffix at index 3 based on column DupeName using expression grel:value.split(/(Mr|Mrs|Miss|Ms|Dr|Admiral|Air Comm|Ambassador|Baron|Baroness|Brig & Mrs|Brig Gen|Brigadier|Brother|Canon|Capt|Chief|Cllr|Col|Commander|Commander & Mrs|Consul|Consul General|Count|Countess|Countess of|Cpl|Dame|Deputy|Dr & Mrs|Drs|Duchess|Duke|Earl|Father|General|Gräfin|HE|HMA|Her Grace|His Excellency|Ing|Judge|Justice|Lady|Lic|Llc|Lord|Lord & Lady|Lt|Lt Col|Lt Cpl|Madam|Madame|Major|Major General|Marchioness|Marquis|Minister|Mme|Mr & Dr|Mr & Mrs|Mr & Ms|Prince|Princess|Prof|Prof & Dr|Prof & Mrs|Prof & Rev|Prof Dame|Prof Dr|Pvt|Rabbi|Rear Admiral|Rev|Rev & Mrs|Rev Canon|Rev Dr|Senator|Sgt|Sheriff|Sir|Sir & Lady|Sister|The Earl of|The Hon|The Hon Dr|The Hon Lady|The Hon Lord|The Hon Mrs|The Hon Sir|The Honourable|The Rt Hon|The Rt Hon Dr|The Rt Hon Lord|The Rt Hon Sir|The Rt Hon Visc|Viscount)/, value)[1]" | |
}, | |
{ | |
"op": "core/text-transform", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "RegulatedDoneeType", | |
"expression": "value", | |
"columnName": "RegulatedDoneeType", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "MP - Member of Parliament", | |
"l": "MP - Member of Parliament" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "suffix", | |
"expression": "value.trim()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10, | |
"description": "Text transform on cells in column suffix using expression value.trim()" | |
}, | |
{ | |
"op": "core/text-transform", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "RegulatedDoneeType", | |
"expression": "value", | |
"columnName": "RegulatedDoneeType", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "MP - Member of Parliament", | |
"l": "MP - Member of Parliament" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "suffix", | |
"expression": "grel:value.replace(/ MP$/, '')", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10, | |
"description": "Text transform on cells in column suffix using expression grel:value.replace(/ MP$/, '')" | |
}, | |
{ | |
"op": "core/column-addition", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"baseColumnName": "DonorName", | |
"expression": "grel:value.rpartition(/\\s/, true)[1]", | |
"onError": "set-to-blank", | |
"newColumnName": "CompanyNameSuffix", | |
"columnInsertIndex": 7, | |
"description": "Create column CompanyNameSuffix at index 7 based on column DonorName using expression grel:value.rpartition(/\\s/, true)[1]" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "value", | |
"edits": [ | |
{ | |
"from": [ | |
"Ltd", | |
"LTD", | |
"Ltd.", | |
"ltd", | |
"Ltd)", | |
"LTD.", | |
"Ltd.,", | |
"Ltd`" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "LTD" | |
}, | |
{ | |
"from": [ | |
"Plc", | |
"PLC", | |
"plc", | |
"PLc", | |
"PlC" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "PLC" | |
}, | |
{ | |
"from": [ | |
"International", | |
"International)" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "International" | |
}, | |
{ | |
"from": [ | |
"Inc", | |
"Inc." | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "INC" | |
} | |
], | |
"description": "Mass edit cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "value", | |
"edits": [ | |
{ | |
"from": [ | |
"Co", | |
"Co.", | |
"C/o", | |
"Co)" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "Co" | |
}, | |
{ | |
"from": [ | |
"Limited", | |
"LIMITED", | |
"limited", | |
"Limited." | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "LIMITED" | |
} | |
], | |
"description": "Mass edit cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "value", | |
"edits": [ | |
{ | |
"from": [ | |
"B.V.", | |
"BV" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "B.V." | |
} | |
], | |
"description": "Mass edit cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "value", | |
"edits": [ | |
{ | |
"from": [ | |
"LIMITED", | |
"LIMITED(THE)", | |
"Linited", | |
"Limted" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "LIMITED" | |
} | |
], | |
"description": "Mass edit cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "value", | |
"edits": [ | |
{ | |
"from": [ | |
"Ltf" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "LTD" | |
} | |
], | |
"description": "Mass edit cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "value", | |
"edits": [ | |
{ | |
"from": [ | |
"Lts" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "LTD" | |
} | |
], | |
"description": "Mass edit cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "value", | |
"edits": [ | |
{ | |
"from": [ | |
"Limit" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "LIMITED" | |
} | |
], | |
"description": "Mass edit cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "value", | |
"edits": [ | |
{ | |
"from": [ | |
"Co.Ltd" | |
], | |
"fromBlank": false, | |
"fromError": false, | |
"to": "LTD" | |
} | |
], | |
"description": "Mass edit cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/blank-down", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
}, | |
{ | |
"type": "list", | |
"name": "CompanyNameSuffix", | |
"expression": "value", | |
"columnName": "CompanyNameSuffix", | |
"invert": true, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "LIMITED", | |
"l": "LIMITED" | |
} | |
}, | |
{ | |
"v": { | |
"v": "Co", | |
"l": "Co" | |
} | |
}, | |
{ | |
"v": { | |
"v": "LLP", | |
"l": "LLP" | |
} | |
}, | |
{ | |
"v": { | |
"v": "LP", | |
"l": "LP" | |
} | |
}, | |
{ | |
"v": { | |
"v": "S.A.", | |
"l": "S.A." | |
} | |
}, | |
{ | |
"v": { | |
"v": "LTD", | |
"l": "LTD" | |
} | |
}, | |
{ | |
"v": { | |
"v": "INC", | |
"l": "INC" | |
} | |
}, | |
{ | |
"v": { | |
"v": "PLC", | |
"l": "PLC" | |
} | |
}, | |
{ | |
"v": { | |
"v": "B.V.", | |
"l": "B.V." | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"description": "Blank down cells in column CompanyNameSuffix" | |
}, | |
{ | |
"op": "core/text-transform", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"type": "list", | |
"name": "DonorStatus", | |
"expression": "value", | |
"columnName": "DonorStatus", | |
"invert": false, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "Company", | |
"l": "Company" | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
}, | |
{ | |
"type": "list", | |
"name": "CompanyNameSuffix", | |
"expression": "value", | |
"columnName": "CompanyNameSuffix", | |
"invert": true, | |
"omitBlank": false, | |
"omitError": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": "LIMITED", | |
"l": "LIMITED" | |
} | |
}, | |
{ | |
"v": { | |
"v": "Co", | |
"l": "Co" | |
} | |
}, | |
{ | |
"v": { | |
"v": "LLP", | |
"l": "LLP" | |
} | |
}, | |
{ | |
"v": { | |
"v": "LP", | |
"l": "LP" | |
} | |
}, | |
{ | |
"v": { | |
"v": "S.A.", | |
"l": "S.A." | |
} | |
}, | |
{ | |
"v": { | |
"v": "LTD", | |
"l": "LTD" | |
} | |
}, | |
{ | |
"v": { | |
"v": "INC", | |
"l": "INC" | |
} | |
}, | |
{ | |
"v": { | |
"v": "PLC", | |
"l": "PLC" | |
} | |
}, | |
{ | |
"v": { | |
"v": "B.V.", | |
"l": "B.V." | |
} | |
} | |
], | |
"selectBlank": false, | |
"selectError": false | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "CompanyNameSuffix", | |
"expression": "null", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10, | |
"description": "Text transform on cells in column CompanyNameSuffix using expression null" | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment