Last active
May 28, 2021 12:24
-
-
Save baldwicc/8756aacd1cfb2ecc8659af9128f539c6 to your computer and use it in GitHub Desktop.
Replaces sensitive identifiers with a freshly generated random GUID
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let | |
// Replaces a column's contents with a psuedononymous identifier that is designed not to be reversible. | |
// | |
// Note that this does not guarantee reversibility is impossible; the only assertions are that: | |
// - they are unique and randomly generated upon each refresh | |
// - they are designed not to be reversible | |
// - they are *not* cryptographically secure (i.e. entropy may be derived from a simple source, such as the current time) | |
fn = (DataTable as table, SensitiveIdentifier as text) => | |
let | |
// make a single-column table containing the distinct sensitive ids | |
DistinctValues = Table.Distinct(Table.SelectColumns(DataTable, { SensitiveIdentifier })), | |
// make a psuedononymous GUID for each one (since at this point, we still have reversibility) | |
AssociateNewGUIDs = Table.AddColumn(DistinctValues, "Psuedononymous GUID", each Text.NewGuid(), type text), | |
// we want to replace the sensitive identifier with an integer id (for performance), so: | |
// - sort alphabetically by the new psuedononymous GUIDs | |
// - assign an incremental index | |
// - drop the psuedononymous GUIDs | |
IndexNewGUIDs = Table.RemoveColumns( | |
Table.AddIndexColumn( | |
Table.Sort( | |
AssociateNewGUIDs, | |
{{"Psuedononymous GUID", Order.Ascending}} | |
), | |
"Psuedononymous ID", | |
10000, | |
5 | |
), | |
"Psuedononymous GUID" | |
), | |
// b-b-b-b-buffer; | |
// not strictly nessessary for dataset refresh, but prevents Power Query's preview from chucking a fit | |
Buffer = Table.Buffer(IndexNewGUIDs), | |
// rejoin with source | |
JoinWithSourceData = Table.Join(DataTable, SensitiveIdentifier, Buffer, SensitiveIdentifier), | |
// remove original column | |
RemoveOriginalColumn = Table.RemoveColumns(JoinWithSourceData, { SensitiveIdentifier }), | |
// rename new column | |
RenameNewColumn = Table.RenameColumns(RemoveOriginalColumn, {{ "Psuedononymous ID", "Psuedo " & SensitiveIdentifier }}), | |
// done! | |
Result = RenameNewColumn | |
in | |
Result | |
in | |
fn |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment