Created
November 18, 2022 13:30
-
-
Save sorrycc/9d48bbc767e212baeee3c24e3a8f793f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getContainer(root) { | |
root ||= document.body; | |
if (!root.innerText) return null; | |
const totalWords = root.innerText.match(/\S+/g).length; | |
let ps = root.querySelectorAll('p'); | |
if (!ps.length) ps = root.querySelectorAll('div'); | |
if (!ps.length) return null; | |
let container = null; | |
let maxWords = 0; | |
for (let p of ps) { | |
if (!p.innerText) continue; | |
if (!p.innerText.match(/\S+/g)) continue; | |
const numWords = p.innerText.match(/\S+/g).length; | |
if (numWords > maxWords) { | |
maxWords = numWords; | |
container = p; | |
} | |
} | |
let selectedWords = maxWords; | |
while (selectedWords / totalWords < 0.4 && container !== root && container.parentElement && container.parentElement.innerText) { | |
container = container.parentElement; | |
selectedWords = container.innerText.match(/\S+/g).length; | |
} | |
while (['p', 'blockquote', 'video', 'figure'].includes(container.tagName.toLowerCase())) { | |
container = container.parentElement; | |
} | |
return container; | |
} | |
function getNodes(container) { | |
const nodes = []; | |
const blockTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'p']; | |
for (const blockTag of blockTags) { | |
const elements = container.querySelectorAll(blockTag); | |
for (const element of elements) { | |
nodes.push(element); | |
} | |
} | |
return nodes; | |
} | |
function copyNodes(nodes) { | |
const copiedNodes = []; | |
for (const node of nodes) { | |
const copiedNode = node.cloneNode(true); | |
// node.insertAfter(copiedNode); | |
if (node.nextSibling) { | |
node.parentNode.insertBefore(copiedNode, node.nextSibling); | |
} else { | |
node.parentNode.appendChild(copiedNode); | |
} | |
copiedNodes.push(copiedNode); | |
} | |
return copiedNodes; | |
} | |
const API_URL = 'http://localhost:8080/translate'; | |
async function translate(text) { | |
const res = await fetch(API_URL, { | |
method: 'POST', | |
mode: 'cors', | |
headers: {'Content-Type': 'application/json'}, | |
body: JSON.stringify({ | |
text, | |
source_lang: 'auto', | |
target_lang: 'ZH', | |
}), | |
}); | |
if (res.status !== 200) { | |
throw new Error(`Translate failed, ${res.status} ${res.statusText}`); | |
} | |
const json = await res.json(); | |
const { data } = json; | |
if (!data) { | |
throw new Error(`Translate failed, data is null`); | |
} | |
return data; | |
} | |
function textToBlocks(text) { | |
const SEP = '\n\n'; | |
const MAX_BLOCK_LENGTH = 4000; | |
const chunks = text.split(SEP); | |
const result = []; | |
let block = []; | |
let blockCount = 0; | |
for (const chunk of chunks) { | |
const chunkSize = chunk.length; | |
// console.log('> chunkSize', chunkSize, blockCount + chunkSize); | |
if (chunkSize >= MAX_BLOCK_LENGTH) throw new Error(`chunk is too large`); | |
if (blockCount + chunkSize > MAX_BLOCK_LENGTH) { | |
result.push(block.join(SEP)); | |
block = [chunk]; | |
blockCount = chunkSize; | |
} else { | |
blockCount += chunkSize; | |
block.push(chunk); | |
} | |
} | |
if (block.length) result.push(block.join(SEP)); | |
return result; | |
} | |
const delay = (ms) => new Promise((res) => setTimeout(res, ms)); | |
(async () => { | |
const container = getContainer(); | |
console.log('> container', container); | |
if (!container) { | |
throw new Error('No container found'); | |
} | |
const nodes = []; | |
nodes.push(...getNodes(container)); | |
if (!nodes.length) { | |
throw new Error('No nodes found'); | |
} | |
const titleNode = document.querySelector('h1'); | |
if (titleNode && !nodes.includes(titleNode)) { | |
nodes.unshift(titleNode); | |
} | |
const textToTranslate = nodes.map(node => node.innerText.replace(/\n\n/g, '^^^')).join('\n\n'); | |
const blocks = textToBlocks(textToTranslate); | |
console.log('blocks', blocks.map(b => b.length).join(', ')); | |
let initialIndex = 0; | |
let blockIndex = 0; | |
for (const block of blocks) { | |
console.log(`Translating block ${blockIndex + 1}/${blocks.length}...`); | |
const translatedText = await translate(block); | |
console.log('Translated, copying nodes...'); | |
const translatedNodes = translatedText.split('\n\n').map(item => item.replace(/\^\^\^/g, '\n\n')); | |
const len = translatedNodes.length; | |
console.log('Check if length is equal', len, nodes.length, initialIndex); | |
for (let i = initialIndex; i < initialIndex + len; i++) { | |
const copiedNodes = copyNodes([nodes[i]]); | |
copiedNodes[0].innerHTML = `<font style="border-bottom:2px solid #72ECE9;vertical-align:inherit">${translatedNodes[i - initialIndex]}</font>`; | |
} | |
initialIndex += len; | |
console.log('x initialIndex', initialIndex); | |
blockIndex += 1; | |
if (blockIndex < blocks.length) { | |
const delayTime = 1000 + Math.floor(Math.random() * 3000); | |
console.log(`delay ${delayTime}ms...`); | |
await delay(delayTime); | |
} | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment