Helpful links:
Last active
May 30, 2019 15:15
-
-
Save jeremyfelt/7884027 to your computer and use it in GitHub Desktop.
Horrifying Charset Corrections
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Helped to find initial weirdness. By showing all of the obvious `â ` appearances. | |
SELECT post_content FROM wp_posts WHERE post_content RLIKE UNHEX('C3A2'); | |
# EN DASH – | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C280C293' ), UNHEX('E28093')); | |
# EM DASH — | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C280C294' ), UNHEX('E28094')); | |
# LEFT DOUBLE QUOTATION MARK “ | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C280C29C' ), UNHEX('E2809C')); | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2E282ACC593' ), UNHEX('E2809C')); | |
# RIGHT DOUBLE QUOTATION MARK ” | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C280C29D' ), UNHEX('E2809D')); | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2E282ACC29D' ), UNHEX('E2809D')); | |
# LEFT SINGLE QUOTATION MARK ‘ | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C280C298' ), UNHEX('E28098')); | |
# RIGHT SINGLE QUOTATION MARK ’ | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C280C299' ), UNHEX('E28099')); | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2E282ACE284A2'), UNHEX('E28099')); | |
# BULLET • | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C280C2A2' ), UNHEX('E280A2')); | |
# HORIZONTAL ELLIPSIS … | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C280C2A6' ), UNHEX('E280A6')); | |
# TRADEMARK SIGN ™ | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2C284C2A2' ), UNHEX('E284A2')); | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C3A2E2809EC2A2' ), UNHEX('E284A2')); | |
# Run these almost last as they are slightly generic... | |
# | |
# Shows as a private use character (blank space) in raw output, should be ‘. | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C291' ), UNHEX('E28098')); | |
# Shows a blank space in raw output, should be ’ | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C292' ), UNHEX('E28099')); | |
# Shows as a control character (blank space) in raw output, should be “. Sneaky! | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C293' ), UNHEX('E2809C')); | |
# Shows as a cancel character (blank space) in raw output, should be ”. Also sneaky! | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('C294' ), UNHEX('E2809D')); | |
# Run these last as they likely depend on previous matches... | |
# | |
# This was a combo deal where a curly quote and apostrophe were duplicating efforts as '’ - Fun! | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('27E28099' ), UNHEX('E28099')); | |
# Another combo deal where '‘ was appearing. | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('27E28098' ), UNHEX('E28098')); | |
# This was a combo deal where the ASCII double quote and UTF8 double quote | |
# were side by side as "” | |
UPDATE wp_posts SET post_content=REPLACE(post_content, UNHEX('22E2809D' ), UNHEX('E2809D')); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment