Skip to content

Instantly share code, notes, and snippets.

@gibson042
Last active October 15, 2023 18:20
Show Gist options
  • Save gibson042/5495b65eed0d8e371c155c0dd91262b1 to your computer and use it in GitHub Desktop.
Save gibson042/5495b65eed0d8e371c155c0dd91262b1 to your computer and use it in GitHub Desktop.
Emoji Spy user script
// ==UserScript==
// @name Emoji Spy
// @namespace https://github.com/gibson042
// @description Add tooltips describing emoji sequence and symbol code points.
// @source https://gist.github.com/gibson042/5495b65eed0d8e371c155c0dd91262b1
// @updateURL https://gist.github.com/gibson042/5495b65eed0d8e371c155c0dd91262b1/raw/emoji-spy.user.js
// @downloadURL https://gist.github.com/gibson042/5495b65eed0d8e371c155c0dd91262b1/raw/emoji-spy.user.js
// @version 0.3.0
// @date 2023-10-15
// @author Richard Gibson <@gmail.com>
// @include *
// ==/UserScript==
//
// **COPYRIGHT NOTICE**
//
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// For the CC0 Public Domain Dedication, see
// <https://creativecommons.org/publicdomain/zero/1.0/>.
//
// **END COPYRIGHT NOTICE**
//
//
// Changelog:
// 0.3.0 (2023-10-15)
// * New: Detect and wrap emoji placeholder <img> elements on Twitter.
// 0.2.0 (2023-09-08)
// * New: CC0 Public Domain Dedication.
(function() {
"use strict";
const CLASS = "gibson042-emoji-spy";
const LRI = "\u2066"; // U+2066 LEFT-TO-RIGHT ISOLATE
const PDI = "\u2069"; // U+2069 POP DIRECTIONAL ISOLATE
const CP = String.fromCodePoint;
const R = String.raw;
/* RE_POSSIBLE_EMOJI is a pattern that captures (possibly-invalid) emoji and other symbols,
excluding trivial ASCII/Latin-1 (e.g., digits)
and including dingbats, emoticons, transport/map/alchemical/misc. symbols, and keycap sequences lacking VS16. */
/* https://unicode.org/reports/tr51/#def_emoji_tag_sequence */
const P_EMOJI_TAG_SEQUENCE = R`(?:\p{Emoji}\uFE0F?|\p{EBase}\p{EMod})[\u{E0020}-\u{E007E}]+\u{E007F}`;
/* https://unicode.org/reports/tr51/#EBNF_and_Regex */
const P_ZWJ_ELEMENT = R`(?:\p{Emoji}|[\u2600-\u27BF\u{1F300}-\u{1FAFF}])(?:\p{EMod}|\uFE0F\u20E3?)?`;
const P_NONTRIVIAL_ZWJ_SEQUENCE = R`(?:(?![\x00-\xFF])${P_ZWJ_ELEMENT}|[#*0-9][\uFE0F\u20E3]{1,2})(?:\u200D${P_ZWJ_ELEMENT})*`;
const RE_POSSIBLE_EMOJI = RegExp(R`(\p{RI}\p{RI}|${P_EMOJI_TAG_SEQUENCE}|${P_NONTRIVIAL_ZWJ_SEQUENCE})`, "u");
/* RE_HTML_RTL is a pattern that matches an HTML directionally right-to-left character
that is not preceded by a left-to-right character (but we ignore the <script>/<style> filtering). */
/* https://html.spec.whatwg.org/multipage/dom.html#the-dir-attribute */
/* https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%5B%3Abc%3DL%3A%5D%5B%3Abc%3DAL%3A%5D%5B%3Abc%3DR%3A%5D%5D&abb=on&esc=on */
const RE_HTML_RTL = RegExp(R`^
[^
A-Za-z\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u02BB-\u02C1\u02D0\u02D1\u02E0-\u02E4\u02EE\u0370-\u0373\u0376-\u037D
\u037F-\u0383\u0386\u0388-\u03F5\u03F7-\u0482\u048A-\u0589\u058B\u058C\u0903-\u0939\u093B\u093D-\u0940\u0949-\u094C\u094E-\u0950
\u0958-\u0961\u0964-\u0980\u0982-\u09BB\u09BD-\u09C0\u09C5-\u09CC\u09CE-\u09E1\u09E4-\u09F1\u09F4-\u09FA\u09FC\u09FD\u09FF\u0A00
\u0A03-\u0A3B\u0A3D-\u0A40\u0A43-\u0A46\u0A49\u0A4A\u0A4E-\u0A50\u0A52-\u0A6F\u0A72-\u0A74\u0A76-\u0A80\u0A83-\u0ABB\u0ABD-\u0AC0\u0AC6
\u0AC9-\u0ACC\u0ACE-\u0AE1\u0AE4-\u0AF0\u0AF2-\u0AF9\u0B00\u0B02-\u0B3B\u0B3D\u0B3E\u0B40\u0B45-\u0B4C\u0B4E-\u0B54\u0B57-\u0B61
\u0B64-\u0B81\u0B83-\u0BBF\u0BC1-\u0BCC\u0BCE-\u0BF2\u0BFB-\u0BFF\u0C01-\u0C03\u0C05-\u0C3B\u0C3D\u0C41-\u0C45\u0C49\u0C4E-\u0C54
\u0C57-\u0C61\u0C64-\u0C77\u0C7F\u0C80\u0C82-\u0CBB\u0CBD-\u0CCB\u0CCE-\u0CE1\u0CE4-\u0CFF\u0D02-\u0D3A\u0D3D-\u0D40\u0D45-\u0D4C
\u0D4E-\u0D61\u0D64-\u0D80\u0D82-\u0DC9\u0DCB-\u0DD1\u0DD5\u0DD7-\u0E30\u0E32\u0E33\u0E3B-\u0E3E\u0E40-\u0E46\u0E4F-\u0EB0\u0EB2\u0EB3
\u0EBD-\u0EC7\u0ECF-\u0F17\u0F1A-\u0F34\u0F36\u0F38\u0F3E-\u0F70\u0F7F\u0F85\u0F88-\u0F8C\u0F98\u0FBD-\u0FC5\u0FC7-\u102C\u1031\u1038
\u103B\u103C\u103F-\u1057\u105A-\u105D\u1061-\u1070\u1075-\u1081\u1083\u1084\u1087-\u108C\u108E-\u109C\u109E-\u135C\u1360-\u138F
\u139A-\u13FF\u1401-\u167F\u1681-\u169A\u169D-\u1711\u1715-\u1731\u1734-\u1751\u1754-\u1771\u1774-\u17B3\u17B6\u17BE-\u17C5\u17C7\u17C8
\u17D4-\u17DA\u17DC\u17DE-\u17EF\u17FA-\u17FF\u1810-\u1884\u1887-\u18A8\u18AA-\u191F\u1923-\u1926\u1929-\u1931\u1933-\u1938
\u193C-\u193F\u1941-\u1943\u1946-\u19DD\u1A00-\u1A16\u1A19\u1A1A\u1A1C-\u1A55\u1A57\u1A5F\u1A61\u1A63\u1A64\u1A6D-\u1A72\u1A7D\u1A7E
\u1A80-\u1AAF\u1ACF-\u1AFF\u1B04-\u1B33\u1B35\u1B3B\u1B3D-\u1B41\u1B43-\u1B6A\u1B74-\u1B7F\u1B82-\u1BA1\u1BA6\u1BA7\u1BAA\u1BAE-\u1BE5
\u1BE7\u1BEA-\u1BEC\u1BEE\u1BF2-\u1C2B\u1C34\u1C35\u1C38-\u1CCF\u1CD3\u1CE1\u1CE9-\u1CEC\u1CEE-\u1CF3\u1CF5-\u1CF7\u1CFA-\u1DBF
\u1E00-\u1FBC\u1FBE\u1FC2-\u1FCC\u1FD0-\u1FDC\u1FE0-\u1FEC\u1FF0-\u1FFC\u1FFF\u200E\u2071-\u2073\u207F\u208F-\u209F\u20F1-\u20FF\u2102
\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u214F\u2160-\u2188
\u218C-\u218F\u2336-\u237A\u2395\u2427-\u243F\u244B-\u245F\u249C-\u24E9\u26AC\u2800-\u28FF\u2B74\u2B75\u2B96\u2C00-\u2CE4\u2CEB-\u2CEE
\u2CF2-\u2CF8\u2D00-\u2D7E\u2D80-\u2DDF\u2E5E-\u2E7F\u2E9A\u2EF4-\u2EFF\u2FD6-\u2FEF\u2FFC-\u2FFF\u3005-\u3007\u3021-\u3029\u302E\u302F
\u3031-\u3035\u3038-\u303C\u3040-\u3098\u309D-\u309F\u30A1-\u30FA\u30FC-\u31BF\u31E4-\u321C\u321F-\u324F\u3260-\u327B\u327F-\u32B0
\u32C0-\u32CB\u32D0-\u3376\u337B-\u33DD\u33E0-\u33FE\u3400-\u4DBF\u4E00-\uA48F\uA4C7-\uA60C\uA610-\uA66E\uA680-\uA69D\uA6A0-\uA6EF
\uA6F2-\uA6FF\uA722-\uA787\uA789-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA824\uA827\uA82D-\uA837\uA83A-\uA873\uA878-\uA8C3
\uA8C6-\uA8DF\uA8F2-\uA8FE\uA900-\uA925\uA92E-\uA946\uA952-\uA97F\uA983-\uA9B2\uA9B4\uA9B5\uA9BA\uA9BB\uA9BE-\uA9E4\uA9E6-\uAA28\uAA2F
\uAA30\uAA33\uAA34\uAA37-\uAA42\uAA44-\uAA4B\uAA4D-\uAA7B\uAA7D-\uAAAF\uAAB1\uAAB5\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2-\uAAEB\uAAEE-\uAAF5
\uAAF7-\uAB69\uAB6C-\uABE4\uABE6\uABE7\uABE9-\uABEC\uABEE-\uFB1C\uFE1A-\uFE1F\uFE53\uFE67\uFE6C-\uFE6F\uFF00\uFF21-\uFF3A\uFF41-\uFF5A
\uFF66-\uFFDF\uFFE7\uFFEF\U10000-\U10100\U10102-\U1013F\U1018D-\U1018F\U1019D-\U1019F\U101A1-\U101FC\U101FE-\U102DF\U102FC-\U10375
\U1037B-\U107FF\U11000\U11002-\U11037\U11047-\U11051\U11066-\U1106F\U11071\U11072\U11075-\U1107E\U11082-\U110B2\U110B7\U110B8
\U110BB-\U110C1\U110C3-\U110FF\U11103-\U11126\U1112C\U11135-\U11172\U11174-\U1117F\U11182-\U111B5\U111BF-\U111C8\U111CD\U111CE
\U111D0-\U1122E\U11232\U11233\U11235\U11238-\U1123D\U1123F\U11240\U11242-\U112DE\U112E0-\U112E2\U112EB-\U112FF\U11302-\U1133A
\U1133D-\U1133F\U11341-\U11365\U1136D-\U1136F\U11375-\U11437\U11440\U11441\U11445\U11447-\U1145D\U1145F-\U114B2\U114B9\U114BB-\U114BE
\U114C1\U114C4-\U115B1\U115B6-\U115BB\U115BE\U115C1-\U115DB\U115DE-\U11632\U1163B\U1163C\U1163E\U11641-\U1165F\U1166D-\U116AA\U116AC
\U116AE\U116AF\U116B6\U116B8-\U1171C\U11720\U11721\U11726\U1172C-\U1182E\U11838\U1183B-\U1193A\U1193D\U1193F-\U11942\U11944-\U119D3
\U119D8\U119D9\U119DC-\U119DF\U119E1-\U11A00\U11A07\U11A08\U11A0B-\U11A32\U11A39\U11A3A\U11A3F-\U11A46\U11A48-\U11A50\U11A57\U11A58
\U11A5C-\U11A89\U11A97\U11A9A-\U11C2F\U11C37\U11C3E-\U11C91\U11CA8\U11CA9\U11CB1\U11CB4\U11CB7-\U11D30\U11D37-\U11D39\U11D3B\U11D3E
\U11D46\U11D48-\U11D8F\U11D92-\U11D94\U11D96\U11D98-\U11EF2\U11EF5-\U11EFF\U11F02-\U11F35\U11F3B-\U11F3F\U11F41\U11F43-\U11FD4
\U11FF2-\U1343F\U13441-\U13446\U13456-\U16AEF\U16AF5-\U16B2F\U16B37-\U16F4E\U16F50-\U16F8E\U16F93-\U16FE1\U16FE3\U16FE5-\U1BC9C\U1BC9F
\U1BCA4-\U1CEFF\U1CF2E\U1CF2F\U1CF47-\U1D166\U1D16A-\U1D172\U1D183\U1D184\U1D18C-\U1D1A9\U1D1AE-\U1D1E8\U1D1EB-\U1D1FF\U1D246-\U1D2BF
\U1D2D4-\U1D2FF\U1D357-\U1D6DA\U1D6DC-\U1D714\U1D716-\U1D74E\U1D750-\U1D788\U1D78A-\U1D7C2\U1D7C4-\U1D7CD\U1D800-\U1D9FF\U1DA37-\U1DA3A
\U1DA6D-\U1DA74\U1DA76-\U1DA83\U1DA85-\U1DA9A\U1DAA0\U1DAB0-\U1DFFF\U1E007\U1E019\U1E01A\U1E022\U1E025\U1E02B-\U1E08E\U1E090-\U1E12F
\U1E137-\U1E2AD\U1E2AF-\U1E2EB\U1E2F0-\U1E2FE\U1E300-\U1E4EB\U1E4F0-\U1E7FF\U1F02C-\U1F02F\U1F094-\U1F09F\U1F0AF\U1F0B0\U1F0C0\U1F0D0
\U1F0F6-\U1F0FF\U1F110-\U1F12E\U1F130-\U1F169\U1F170-\U1F1AC\U1F1AE-\U1F25F\U1F266-\U1F2FF\U1F6D8-\U1F6DB\U1F6ED-\U1F6EF\U1F6FD-\U1F6FF
\U1F777-\U1F77A\U1F7DA-\U1F7DF\U1F7EC-\U1F7EF\U1F7F1-\U1F7FF\U1F80C-\U1F80F\U1F848-\U1F84F\U1F85A-\U1F85F\U1F888-\U1F88F\U1F8AE\U1F8AF
\U1F8B2-\U1F8FF\U1FA54-\U1FA5F\U1FA6E\U1FA6F\U1FA7D-\U1FA7F\U1FA89-\U1FA8F\U1FABE\U1FAC6-\U1FACD\U1FADC-\U1FADF\U1FAE9-\U1FAEF
\U1FAF9-\U1FAFF\U1FB93\U1FBCB-\U1FBEF\U1FBFA-\U1FFFD\U20000-\U2FFFD\U30000-\U3FFFD\U40000-\U4FFFD\U50000-\U5FFFD\U60000-\U6FFFD
\U70000-\U7FFFD\U80000-\U8FFFD\U90000-\U9FFFD\UA0000-\UAFFFD\UB0000-\UBFFFD\UC0000-\UCFFFD\UD0000-\UDFFFD\UE1000-\UEFFFD\UF0000-\UFFFFD
\U100000-\U10FFFD
]*
[
\u0590\u05BE\u05C0\u05C3\u05C6\u05C8-\u05FF\u0608\u060B\u060D\u061B-\u064A\u066D-\u066F\u0671-\u06D5\u06E5\u06E6\u06EE\u06EF
\u06FA-\u0710\u0712-\u072F\u074B-\u07A5\u07B1-\u07EA\u07F4\u07F5\u07FA-\u07FC\u07FE-\u0815\u081A\u0824\u0828\u082E-\u0858\u085C-\u088F
\u0892-\u0897\u08A0-\u08C9\u200F\uFB1D\uFB1F-\uFB28\uFB2A-\uFD3D\uFD50-\uFDCE\uFDF0-\uFDFC\uFE70-\uFEFE\U10800-\U1091E\U10920-\U10A00
\U10A04\U10A07-\U10A0B\U10A10-\U10A37\U10A3B-\U10A3E\U10A40-\U10AE4\U10AE7-\U10B38\U10B40-\U10D23\U10D28-\U10D2F\U10D3A-\U10E5F
\U10E7F-\U10EAA\U10EAD-\U10EFC\U10F00-\U10F45\U10F51-\U10F81\U10F86-\U10FFF\U1E800-\U1E8CF\U1E8D7-\U1E943\U1E94B-\U1EEEF\U1EEF2-\U1EFFF
]`
.replace(/\s+/g, "").replace(/\\U([0-9a-fA-F]{1,8})/g, (e, h) => `\\u{${h}}`), "u"
);
/* isRTL returns whether or not the directionality of an HTML element is right-to-left. */
const isRTL = CSS.supports(":dir(rtl)") ?
el => el.matches(":dir(rtl)") :
el => {
el = el.closest("bdi, [dir]:not([dir=auto])");
if (!el || el.dir === "ltr") return false;
if (el.dir === "rtl") return true;
return RE_HTML_RTL.test(el.textContent);
};
/* g_skip contains nodes that have already been processed. */
const g_skip = new WeakSet();
/* g_wrappers maps wrapped nodes to their wrappers. */
const g_wrappers = new WeakMap();
const getWrapperTarget = el => g_wrappers.get(el) || el;
/* g_names is a map of code point to name. */
const g_names = new Map([
...Object.entries({
"#": "NUMBER SIGN",
"*": "ASTERISK",
"\u200D": "ZERO WIDTH JOINER",
"\u20E3": "COMBINING ENCLOSING KEYCAP",
"\u2640": "FEMALE SIGN",
"\u2642": "MALE SIGN",
"\u2690": "WHITE FLAG",
"\u2691": "BLACK FLAG",
"\u26A2": "DOUBLED FEMALE SIGN",
"\u26A3": "DOUBLED MALE SIGN",
"\u26A4": "INTERLOCKED FEMALE AND MALE SIGN",
"\u26A5": "MALE AND FEMALE SIGN",
"\u26A6": "MALE WITH STROKE SIGN",
"\u26A7": "MALE WITH STROKE AND MALE AND FEMALE SIGN",
"\u26A8": "VERTICAL MALE WITH STROKE SIGN",
"\u26A9": "HORIZONTAL MALE WITH STROKE SIGN",
"\u26B2": "NEUTER",
"\uFE0E": "VARIATION SELECTOR-15 [TEXT PRESENTATION]",
"\uFE0F": "VARIATION SELECTOR-16 [EMOJI PRESENTATION]",
"\u{1F3F3}": "WAVING WHITE FLAG",
"\u{1F3F4}": "WAVING BLACK FLAG",
}),
...Array(10).fill().map((_,d) => [String(d), "DIGIT "+d]),
...Array(26).fill().map((_,i) => [CP(0x1F1E6+i), "REGIONAL INDICATOR SYMBOL LETTER "+CP(0x41+i)]),
...Object.entries({"\u{E0001}": "LANGUAGE TAG", "\u{E0020}": "TAG SPACE", "\u{E007F}": "CANCEL TAG"}),
...Array(0xE007F-0xE0021).fill().map((_,i) => [CP(0xE0021+i), "TAG "+CP(0x21+i)]),
]);
/* g_nameSources is a list of [URL, processor] pairs representing code point name data by descending preference.
Each processor function accepts response text from its URL and returns the corresponding sequence of [code point, name] pairs.
g_nameSources is replaced with null after any function successfully processes data from its URL. */
let g_nameSources = [
["https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3AEmoji%3A%5D", text=>{
let tmp = document.createDocumentFragment().appendChild(make("html"));
tmp.innerHTML = text;
throw new Error("TODO: implement processing");
}],
["https://raw.githubusercontent.com/unicode-org/unicodetools/main/unicodetools/src/main/resources/org/unicode/tools/emoji/en-tts.tsv",
/* Parse tab-separated lines, discarding multi-code-point records like:
U+002A U+20E3 keycap asterisk
*/
text => text.split("\n").flatMap(r=>{ const m=r.match(/^(?:U[+])?([0-9A-F]+)\t(.*)/i); return m ? [[CP("0x"+m[1]), m[2]]] : []; })
],
["https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt",
/* Parse semicolon-separated lines like:
0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;
*/
text => text.trimEnd().split("\n").map(r=>{ r=r.split(";"); return [CP("0x"+r[0]), r[1]]; })
],
];
/* Wrap each emoji sequence with a span describing its code point(s). */
document.addEventListener("mouseover", evt=>{
const target = getWrapperTarget(evt.target);
if (target.nodeType !== 1) return;
if (isWrapper(target)) {
if (g_nameSources) {
/* Load name data on long hover (3 seconds). */
const tLoad = setTimeout(async ()=>{
for (const [url, fn] of g_nameSources || []) {
try {
const text = await fetch(url).then(r => r.status==200 ?
r.text() :
Promise.reject("bad status: HTTP " + r.status)
);
for (const [cp, name] of fn(text)) g_names.set(cp, name);
g_nameSources = null;
break;
} catch (ex) {
console.warn(ex, url);
}
}
}, 3000);
target.addEventListener("mouseleave", evt=>clearTimeout(tLoad), {once: true, passive: true});
} else if (!g_skip.has(target)) {
/* Replace default tooltips after names have loaded. */
const titleWrapper = target.closest("[title]");
if (titleWrapper === target || isWrapper(titleWrapper)) {
titleWrapper.title = makeWrapper(target.textContent, target).title;
}
g_skip.add(target);
}
return;
} else if (g_skip.has(target)) {
return;
}
/* Wrap emoji placeholders. */
const text = getEmojiFromPlaceholder(target);
if (text) {
const wrapper = makeWrapper(text, target);
target.parentNode.insertBefore(wrapper, target);
wrapper.textContent = "";
wrapper.appendChild(target);
(function skipTree(node) {
g_skip.add(node);
g_wrappers.set(node, wrapper);
for (const child of target.childNodes) skipTree(child);
})(target);
return;
}
/* Wrap emoji sequences embedded in child text nodes. */
for (const node of target.childNodes) {
if (node.nodeType !== 3 || g_skip.has(node)) continue;
const text = node.nodeValue;
if (!RE_POSSIBLE_EMOJI.test(text)) {
g_skip.add(node);
continue;
}
text.split(RE_POSSIBLE_EMOJI).forEach((s,i)=>{
if (s) target.insertBefore(i % 2 ? makeWrapper(s, target) : document.createTextNode(s), node);
});
target.removeChild(node);
}
}, {passive: true});
/* Open Emojipedia on wrapper triple-click. */
document.addEventListener("click", evt=>{
const target = getWrapperTarget(evt.target);
if (evt.detail===3 && isWrapper(target)) {
window.open(`https://emojipedia.org/emoji/${encodeURIComponent(target.getAttribute("data-text"))}/`);
}
});
function getEmojiFromPlaceholder(el) {
// Twitter: <img alt="<emoji>" src="https://abs-0.twimg.com/emoji/v2/svg/<hexadecimal code point>.svg">
if (el.matches("img[alt][src*='/emoji/']")) {
return el.alt;
}
}
function makeWrapper(text, elContext) {
const title = [...text]
.map(cp=>`U+${cp.codePointAt(0).toString(16).padStart(4,0).toUpperCase()} ${g_names.get(cp) || ""}`)
.join("\n") +
"\n\nTriple-click for Emojipedia.";
const el = make("span", {
className: CLASS,
textContent: text,
});
el.setAttribute("data-text", text);
g_skip.add(el.firstChild);
if (!isRTL(elContext)) {
el.title = title;
return el;
}
el.dir = "rtl";
const elDirWrapper = make("span", {
className: CLASS,
dir: "ltr",
title,
});
elDirWrapper.appendChild(el);
return elDirWrapper;
}
function isWrapper(el) {
return el.classList.contains(CLASS);
}
/* helpers */
function make(name,props={}){ return Object.assign(document.createElement(name), props); }
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment