|
// ==UserScript== |
|
// @name NovelAI Diffusion Tokenizer Overlay |
|
// @namespace khanon |
|
// @match https://novelai.net/image |
|
// @grant none |
|
// @version 1.1 |
|
// @author khanon |
|
// @description Shows a marker at the end of each 75-token boundary so you can avoid having your tags split across token chunks. |
|
// @homepageURL https://gist.github.com/khanonners/4fa31c9fea1a38bfc159b42797df04f9 |
|
// @downloadURL https://gist.github.com/khanonners/4fa31c9fea1a38bfc159b42797df04f9/raw/naid-tokenizer-overlay.user.js |
|
// @run-at document-start |
|
// ==/UserScript== |
|
|
|
/** Settings */ |
|
const TOKENIZER_OVERLAY_ENABLED = true; |
|
const MARKER_COLOR = "white"; |
|
/** End Settings */ |
|
|
|
const CLIP_MODEL_INDEX = 5; |
|
|
|
/** |
|
minified bundle excerpt: |
|
ci = n(16022), |
|
di = n(16289), |
|
ui = n(86837), |
|
hi = n(32158), |
|
pi = n(18265), |
|
mi = n(43182), |
|
gi = n(48194), |
|
fi = [ |
|
['dutch angle', |
|
5], |
|
[ |
|
'from above', |
|
5 |
|
], |
|
**/ |
|
// as of 2023-11-16: |
|
// `n` is the nextjs require function (window.__next_require__) |
|
// 32158 is the tokenizer module's id |
|
// `hi` is the tokenizer minified module. |
|
// `hi.PT` is the prototype of the tokenizer. |
|
|
|
// The script will try to find the tokenizer module id and prototype name by |
|
// looking through all nextjs modules until it finds one that has a prototype |
|
// with an encode and decode function. |
|
|
|
// Call the tokenizer like this: |
|
// var tokenizer = new hi.PT(); |
|
// tokenizer.encode('prompt', 5); |
|
// Where 5 is the enum index for the CLIP model's encoder. |
|
|
|
(function () { |
|
let Tokenizer; |
|
let pipeTokenId; |
|
let styles = []; |
|
let layoutDebugEnabled = false; |
|
// let skipParamsSearch = false; |
|
|
|
function createOverlayDiv(textarea) { |
|
const overlay = document.createElement("div"); |
|
overlay.style.position = "absolute"; |
|
overlay.style.top = "0"; |
|
overlay.style.left = "0"; |
|
overlay.style.right = "0"; |
|
overlay.style.bottom = "0"; |
|
overlay.style.pointerEvents = "none"; |
|
overlay.style.whiteSpace = "pre-wrap"; |
|
overlay.style.wordWrap = "break-word"; |
|
overlay.style.overflow = "hidden"; |
|
overlay.className = "tokenizer-overlay"; |
|
textarea.parentNode.appendChild(overlay); |
|
|
|
setTimeout(applyTextareaStylesToOverlay, 100, textarea, overlay); |
|
|
|
injectStyles(); |
|
|
|
return overlay; |
|
} |
|
|
|
function applyTextareaStylesToOverlay(textarea, overlay) { |
|
const styles = window.getComputedStyle(textarea); |
|
overlay.style.font = styles.font; |
|
["Top", "Left", "Right", "Bottom"].forEach((side) => { |
|
overlay.style[`border${side}Width`] = styles[`border${side}Width`]; |
|
overlay.style[`border${side}Style`] = styles[`border${side}Style`]; |
|
overlay.style[`border${side}Color`] = "transparent"; |
|
}); |
|
overlay.style.fontSize = styles.fontSize; |
|
overlay.style.lineHeight = styles.lineHeight; |
|
overlay.style.letterSpacing = styles.letterSpacing; |
|
overlay.style.wordSpacing = styles.wordSpacing; |
|
overlay.style.textTransform = styles.textTransform; |
|
overlay.style.textAlign = styles.textAlign; |
|
overlay.style.direction = styles.direction; |
|
overlay.style.padding = styles.padding; |
|
overlay.style.width = styles.width; |
|
overlay.style.height = styles.height; |
|
} |
|
|
|
function injectStyles() { |
|
if (styles.length) styles.forEach((style) => style.remove()); |
|
styles = []; |
|
|
|
addGlobalStyle(` |
|
.tokenizer-overlay { |
|
color: transparent; |
|
} |
|
|
|
.tokenizer-overlay-marker { |
|
position: absolute; |
|
width: 3px; |
|
height: 4px; |
|
background-color: ${MARKER_COLOR}; |
|
} |
|
`); |
|
|
|
if (layoutDebugEnabled) { |
|
addGlobalStyle(` |
|
.tokenizer-overlay { |
|
outline: 2px solid #ff000088; |
|
color: #ff000088; |
|
} |
|
`); |
|
} |
|
} |
|
|
|
function addGlobalStyle(css) { |
|
const head = document.head || document.getElementsByTagName("head")[0]; |
|
const style = document.createElement("style"); |
|
head.appendChild(style); |
|
style.appendChild(document.createTextNode(css)); |
|
styles.push(style); |
|
} |
|
|
|
async function updateOverlay(overlay, textarea) { |
|
if (!Tokenizer) return; |
|
|
|
// disabled the React props inspection because it's too slow and for some |
|
// reason doesn't seem to pick up updated props after the first render even |
|
// though i can see them in devtools |
|
|
|
// const parent = textarea.parentNode?.parentNode?.parentNode; |
|
// const paramsProvider = findPromptParamsProvider(parent); |
|
// if (!paramsProvider) skipParamsSearch = true; |
|
// const params = paramsProvider?.props?.params || {}; |
|
// const { uc, ucPreset, qualityToggle } = params; |
|
|
|
let prefixTokens = 0; |
|
const input = textarea.value; |
|
|
|
// console.log( |
|
// "naid-tokenizer-overlay: Updating overlay", |
|
// uc, |
|
// ucPreset, |
|
// qualityToggle, |
|
// input |
|
// ); |
|
// if (uc === input) { |
|
// // This overlay is for the UC prompt. Add prefix tokens for whatever UC |
|
// // preset is selected. |
|
// if (ucPreset === 0) { |
|
// // High |
|
// prefixTokens = 48; |
|
// } else if (ucPreset === 1) { |
|
// // Light |
|
// prefixTokens = 18; |
|
// } else if (ucPreset === 2) { |
|
// // None |
|
// prefixTokens = 2; |
|
// } |
|
// } |
|
|
|
overlay.innerHTML = ""; |
|
const tokenChunks = await splitIntoTokenChunks(input, prefixTokens); |
|
|
|
tokenChunks.forEach((chunk) => { |
|
const { text, isPipe } = chunk; |
|
// const span = document.createElement("span"); |
|
// span.className = "tokenizer-overlay-chunk" + (isPipe ? " pipe" : ""); |
|
// span.textContent = text; |
|
// overlay.appendChild(span); |
|
|
|
// insert text nodes and then insert empty marker span after each chunk text |
|
overlay.appendChild(document.createTextNode(text)); |
|
overlay.appendChild(document.createElement("span")); |
|
overlay.lastChild.className = "tokenizer-overlay-marker"; |
|
}); |
|
} |
|
|
|
async function splitIntoTokenChunks(text, prefixTokens = 0) { |
|
const textLower = text.toLowerCase(); |
|
const tokenizer = new Tokenizer(); |
|
const chunks = []; |
|
let chunk = ""; |
|
let cursor = 0; |
|
|
|
const tokenized = await tokenizer.encode(text, CLIP_MODEL_INDEX); |
|
|
|
// Pipe characters are markup to force a new chunk, but unlike other markup |
|
// characters the tokenizer returns them as tokens. They aren't sent to the |
|
// model, so we need to filter them out. |
|
if (!pipeTokenId) { |
|
const pipeTokens = await tokenizer.encode("|", CLIP_MODEL_INDEX); |
|
pipeTokenId = pipeTokens[0]; |
|
} |
|
const tokens = tokenized.filter((t) => t !== pipeTokenId); |
|
|
|
let chunkTokenCount = prefixTokens; |
|
for (let i = 0; i < tokens.length; i++) { |
|
const decodedToken = ( |
|
await tokenizer.decode([tokens[i]], CLIP_MODEL_INDEX) |
|
).trim(); |
|
|
|
// advance cursor until non-whitespace/closing markup char as these should |
|
// be applied to the previous token |
|
while (cursor < text.length) { |
|
const char = text[cursor]; |
|
if (char === " " || char === "}" || char === "]") { |
|
cursor++; |
|
// if chunk is empty and there is a previous chunk, append to it |
|
if (!chunk) { |
|
chunks[chunks.length - 1].text += char; |
|
} else { |
|
chunk += char; |
|
} |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
// finds the end of the current token in the original input text. because |
|
// markup characters in the input do not come back in the decoded token, |
|
// we may need to advance the cursor past them |
|
const nextCursor = |
|
textLower.indexOf(decodedToken, cursor) + decodedToken.length; |
|
const tokenWithMarkup = text.substring(cursor, nextCursor); |
|
|
|
// force a new chunk on pipe characters |
|
if (tokenWithMarkup.includes("|")) { |
|
const pipeIndex = tokenWithMarkup.indexOf("|"); |
|
const beforePipe = tokenWithMarkup.substring(0, pipeIndex); |
|
const afterPipe = tokenWithMarkup.substring(pipeIndex); |
|
|
|
// flush the current chunk with text before the pipe |
|
chunk += beforePipe; |
|
if (chunk) cursor = nextCursor - afterPipe.length; |
|
chunks.push({ text: chunk, position: cursor, isPipe: true }); |
|
|
|
// start a new chunk from text after the pipe |
|
chunk = afterPipe; |
|
chunkTokenCount = afterPipe ? 1 : 0; |
|
cursor = nextCursor; |
|
} else { |
|
// otherwise add the token to the current chunk and advance the cursor |
|
chunk += tokenWithMarkup; |
|
cursor = nextCursor; |
|
chunkTokenCount++; |
|
} |
|
|
|
// flush the current chunk if it's full |
|
if (chunkTokenCount >= 75) { |
|
chunks.push({ text: chunk, position: cursor }); |
|
chunk = ""; |
|
chunkTokenCount = 0; |
|
} |
|
} |
|
|
|
// flush the last chunk to ensure we show a marker at the end of the prompt |
|
const leftover = text.substring(cursor); |
|
chunks.push({ text: chunk + leftover, position: cursor }); |
|
return chunks; |
|
} |
|
|
|
function findTokenizerModule() { |
|
console.log("naid-tokenizer-overlay: Searching for minified tokenizer"); |
|
for (let i = 0; i < 100000; i++) { |
|
try { |
|
const module = window.__next_require__(i); |
|
for (const key in module) { |
|
const property = module[key]; |
|
if (typeof property !== "function") continue; |
|
if (property.prototype?.encode && property.prototype?.decode) { |
|
console.log("naid-tokenizer-overlay: Found tokenizer module", i); |
|
console.log("naid-tokenizer-overlay: Found tokenizer ctor", key); |
|
localStorage.setItem("nto-tokenizer-module-id", i); |
|
localStorage.setItem("nto-tokenizer-constructor-name", key); |
|
return { moduleId: i, ctorName: key }; |
|
} |
|
} |
|
} catch (e) {} |
|
} |
|
return null; |
|
} |
|
|
|
function loadTokenizer(moduleId, ctorName) { |
|
const module = window.__next_require__(moduleId); |
|
if (!module) throw new Error("Module not found"); |
|
const ctor = module[ctorName]; |
|
if (!ctor.prototype?.encode || !ctor.prototype?.decode) { |
|
throw new Error("Saved function doesn't look like a tokenizer"); |
|
} |
|
Tokenizer = ctor; |
|
} |
|
|
|
async function observePage() { |
|
const pageObserver = new MutationObserver((mutations) => { |
|
mutations.forEach(({ type, addedNodes, removedNodes }) => { |
|
if (type !== "childList") return; |
|
|
|
addedNodes.forEach((node) => { |
|
if (node.nodeType !== 1) return; |
|
// this might be too slow but we don't get mutations when an entire |
|
// subtree is added at once so we need to check every node under it. |
|
const textareas = node.querySelectorAll("textarea"); |
|
textareas.forEach((textarea) => hookTextarea(textarea)); |
|
}); |
|
|
|
removedNodes.forEach((node) => { |
|
if (node.nodeType !== 1) return; |
|
const textareas = node.querySelectorAll("textarea"); |
|
textareas.forEach((textarea) => { |
|
if (typeof textarea.__disconnectNtoObserver === "function") { |
|
textarea.__disconnectNtoObserver(); |
|
} |
|
}); |
|
}); |
|
}); |
|
}); |
|
pageObserver.observe(document.body, { childList: true, subtree: true }); |
|
console.log("naid-tokenizer-overlay: Set up page observer"); |
|
} |
|
|
|
async function hookTextarea(textarea) { |
|
const overlay = createOverlayDiv(textarea); |
|
updateOverlay(overlay, textarea); |
|
const listener = textarea.addEventListener("scroll", () => { |
|
overlay.scrollTop = textarea.scrollTop; |
|
}); |
|
|
|
let lastValue = ""; |
|
const observer = new MutationObserver((mutations) => { |
|
mutations.forEach((mutation) => { |
|
const { type, attributeName } = mutation; |
|
if (type === "attributes" && attributeName === "style") { |
|
applyTextareaStylesToOverlay(textarea, overlay); |
|
} else if (type === "childList" || type === "subtree") { |
|
if (textarea.value === lastValue) return; |
|
updateOverlay(overlay, textarea); |
|
lastValue = textarea.value; |
|
overlay.scrollTop = textarea.scrollTop; |
|
} |
|
}); |
|
}); |
|
observer.observe(textarea, { attributes: true, childList: true }); |
|
|
|
textarea.__disconnectNtoObserver = () => { |
|
textarea.removeEventListener("scroll", listener); |
|
observer.disconnect(); |
|
overlay.remove(); |
|
console.log("naid-tokenizer-overlay: Unhooked textarea", textarea); |
|
}; |
|
|
|
console.log("naid-tokenizer-overlay: Hooked new textarea", textarea); |
|
return observer; |
|
} |
|
|
|
// function getReactInternalProps(node) { |
|
// for (const key in node) { |
|
// if (key.startsWith("__reactProps$")) { |
|
// return node[key]; |
|
// } |
|
// } |
|
// return null; |
|
// } |
|
|
|
// const MAX_SEARCH_DEPTH = 4; |
|
// function findPromptParamsProvider(node, depth = 0) { |
|
// if (skipParamsSearch) return null; |
|
// if (!node) { |
|
// console.warn("naid-tokenizer-overlay: Reached null node"); |
|
// return null; |
|
// } |
|
// if (depth > MAX_SEARCH_DEPTH) { |
|
// console.warn("naid-tokenizer-overlay: Reached max search depth"); |
|
// return null; |
|
// } |
|
|
|
// const props = getReactInternalProps(node); |
|
// if (!props) { |
|
// console.warn("naid-tokenizer-overlay: No props found on node", node); |
|
// return null; |
|
// } |
|
|
|
// if (props.children) { |
|
// const children = Array.isArray(props.children) |
|
// ? props.children |
|
// : [props.children]; |
|
|
|
// for (const child of children) { |
|
// if (child?.props?.params?.qualityToggle !== undefined) { |
|
// console.log("naid-tokenizer-overlay: Found params provider", child); |
|
// return child; |
|
// } |
|
|
|
// const result = findPromptParamsProvider(child, depth + 1); |
|
// if (result) return result; |
|
// } |
|
// } |
|
// console.warn("naid-tokenizer-overlay: Reached end of branch", node); |
|
// return null; |
|
// } |
|
|
|
async function main() { |
|
if (!TOKENIZER_OVERLAY_ENABLED) return; |
|
|
|
let moduleFound = false; |
|
let start = Date.now(); |
|
let savedModuleId = localStorage.getItem("nto-tokenizer-module-id"); |
|
let savedCtorName = localStorage.getItem("nto-tokenizer-constructor-name"); |
|
|
|
observePage(); |
|
|
|
while (!moduleFound) { |
|
try { |
|
loadTokenizer(savedModuleId, savedCtorName); |
|
moduleFound = true; |
|
console.log("naid-tokenizer-overlay: Got tokenizer"); |
|
} catch (e) { |
|
if (Date.now() - start > 10000) { |
|
console.error( |
|
"naid-tokenizer-overlay: Failed to find tokenizer, script won't work" |
|
); |
|
return; |
|
} |
|
const result = findTokenizerModule(); |
|
if (result) { |
|
savedModuleId = result.moduleId; |
|
savedCtorName = result.ctorName; |
|
} |
|
} |
|
await new Promise((resolve) => setTimeout(resolve, 250)); |
|
} |
|
} |
|
|
|
console.log("naid-tokenizer-overlay: Waiting for page to load"); |
|
window.addEventListener("load", main, { once: true }); |
|
|
|
window.addEventListener("keyup", (e) => { |
|
if (e.shiftKey && e.altKey && e.key === "D") { |
|
layoutDebugEnabled = !layoutDebugEnabled; |
|
console.log("naid-tokenizer-overlay: Debug layout", layoutDebugEnabled); |
|
injectStyles(); |
|
} |
|
}); |
|
})(); |