Last active
December 1, 2019 20:45
-
-
Save klappy/447e461d7685f45866ca337c2b1e6e02 to your computer and use it in GitHub Desktop.
Wrap unicode word tokens with html spans in a sentence, without losing any punctuation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let XRegExp = require('xregexp'); | |
let nonUnicodeLetter = XRegExp('[^\\pL\\pM]+?'); | |
var sentence = "This is a sentence-with some punctuation, and it will be split-up." | |
console.log("sentence: ", sentence) | |
var tokens = sentence.split(nonUnicodeLetter) | |
console.log("tokens: ", tokens) | |
_sentence = sentence | |
var response = "" | |
tokens.forEach(function(token) { | |
var regex = XRegExp('^(.*?)('+token+')') | |
var match = _sentence.match(regex, '') | |
_sentence = _sentence.replace(regex, '') | |
console.log("token: ", token) | |
console.log("match: ", match) | |
response = response + match[1] + '<span>' + match[2] + '</span>' | |
}) | |
response = "<div>" + response + _sentence + "</div>" | |
console.log("response: ", response) | |
// sentence: This is a sentence-with some punctuation, and it will be split-up. | |
// tokens: [ 'This', | |
// 'is', | |
// 'a', | |
// 'sentence', | |
// 'with', | |
// 'some', | |
// 'punctuation', | |
// '', | |
// 'and', | |
// 'it', | |
// 'will', | |
// 'be', | |
// 'split', | |
// 'up', | |
// '' ] | |
// token: This | |
// match: [ 'This', | |
// '', | |
// 'This', | |
// index: 0, | |
// input: 'This is a sentence-with some punctuation, and it will be split-up.' ] | |
// token: is | |
// match: [ ' is', | |
// ' ', | |
// 'is', | |
// index: 0, | |
// input: ' is a sentence-with some punctuation, and it will be split-up.' ] | |
// token: a | |
// match: [ ' a', | |
// ' ', | |
// 'a', | |
// index: 0, | |
// input: ' a sentence-with some punctuation, and it will be split-up.' ] | |
// token: sentence | |
// match: [ ' sentence', | |
// ' ', | |
// 'sentence', | |
// index: 0, | |
// input: ' sentence-with some punctuation, and it will be split-up.' ] | |
// token: with | |
// match: [ '-with', | |
// '-', | |
// 'with', | |
// index: 0, | |
// input: '-with some punctuation, and it will be split-up.' ] | |
// token: some | |
// match: [ ' some', | |
// ' ', | |
// 'some', | |
// index: 0, | |
// input: ' some punctuation, and it will be split-up.' ] | |
// token: punctuation | |
// match: [ ' punctuation', | |
// ' ', | |
// 'punctuation', | |
// index: 0, | |
// input: ' punctuation, and it will be split-up.' ] | |
// token: | |
// match: [ '', '', '', index: 0, input: ', and it will be split-up.' ] | |
// token: and | |
// match: [ ', and', | |
// ', ', | |
// 'and', | |
// index: 0, | |
// input: ', and it will be split-up.' ] | |
// token: it | |
// match: [ ' it', ' ', 'it', index: 0, input: ' it will be split-up.' ] | |
// token: will | |
// match: [ ' will', ' ', 'will', index: 0, input: ' will be split-up.' ] | |
// token: be | |
// match: [ ' be', ' ', 'be', index: 0, input: ' be split-up.' ] | |
// token: split | |
// match: [ ' split', ' ', 'split', index: 0, input: ' split-up.' ] | |
// token: up | |
// match: [ '-up', '-', 'up', index: 0, input: '-up.' ] | |
// token: | |
// match: [ '', '', '', index: 0, input: '.' ] | |
// response: <div><span>This</span> <span>is</span> <span>a</span> <span>sentence</span>-<span>with</span> <span>some</span> <span>punctuation</span><span></span>, <span>and</span> <span>it</span> <span>will</span> <span>be</span> <span>split</span>-<span>up</span><span></span>.</div> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment