klappy · December 1, 2019 20:45
diff --git a/sentence_tokens.js b/sentence_tokens.js
 let XRegExp = require('xregexp');
 let nonUnicodeLetter = XRegExp('[^\\pL\\pM]+?');

 var sentence = "This is a sentence-with some punctuation, and it will be split-up."
 console.log("sentence: ", sentence)

 var tokens = sentence.split(nonUnicodeLetter)
 console.log("tokens: ", tokens)

 _sentence = sentence
 var response = ""
 tokens.forEach(function(token) {
  var regex = XRegExp('^(.*?)('+token+')')
  var match = _sentence.match(regex, '')
  _sentence = _sentence.replace(regex, '')
  console.log("token: ", token)
  console.log("match: ", match)
  response = response + match[1] + '<span>' + match[2] + '</span>'
 })
 response = "<div>" + response + _sentence + "</div>"
 console.log("response: ", response)

 // sentence:  This is a sentence-with some punctuation, and it will be split-up.
 // tokens:  [ 'This',
 //   'is',
 //   'a',
 //   'sentence',
 //   'with',
 //   'some',
 //   'punctuation',
 //   '',
 //   'and',
 //   'it',
 //   'will',
 //   'be',
 //   'split',
 //   'up',
 //   '' ]
 // token:  This
 // match:  [ 'This',
 //   '',
 //   'This',
 //   index: 0,
 //   input: 'This is a sentence-with some punctuation, and it will be split-up.' ]
 // token:  is
 // match:  [ ' is',
 //   ' ',
 //   'is',
 //   index: 0,
 //   input: ' is a sentence-with some punctuation, and it will be split-up.' ]
 // token:  a
 // match:  [ ' a',
 //   ' ',
 //   'a',
 //   index: 0,
 //   input: ' a sentence-with some punctuation, and it will be split-up.' ]
 // token:  sentence
 // match:  [ ' sentence',
 //   ' ',
 //   'sentence',
 //   index: 0,
 //   input: ' sentence-with some punctuation, and it will be split-up.' ]
 // token:  with
 // match:  [ '-with',
 //   '-',
 //   'with',
 //   index: 0,
 //   input: '-with some punctuation, and it will be split-up.' ]
 // token:  some
 // match:  [ ' some',
 //   ' ',
 //   'some',
 //   index: 0,
 //   input: ' some punctuation, and it will be split-up.' ]
 // token:  punctuation
 // match:  [ ' punctuation',
 //   ' ',
 //   'punctuation',
 //   index: 0,
 //   input: ' punctuation, and it will be split-up.' ]
 // token:  
 // match:  [ '', '', '', index: 0, input: ', and it will be split-up.' ]
 // token:  and
 // match:  [ ', and',
 //   ', ',
 //   'and',
 //   index: 0,
 //   input: ', and it will be split-up.' ]
 // token:  it
 // match:  [ ' it', ' ', 'it', index: 0, input: ' it will be split-up.' ]
 // token:  will
 // match:  [ ' will', ' ', 'will', index: 0, input: ' will be split-up.' ]
 // token:  be
 // match:  [ ' be', ' ', 'be', index: 0, input: ' be split-up.' ]
 // token:  split
 // match:  [ ' split', ' ', 'split', index: 0, input: ' split-up.' ]
 // token:  up
 // match:  [ '-up', '-', 'up', index: 0, input: '-up.' ]
 // token:  
 // match:  [ '', '', '', index: 0, input: '.' ]
 // response:  <div><span>This</span> <span>is</span> <span>a</span> <span>sentence</span>-<span>with</span> <span>some</span> <span>punctuation</span><span></span>, <span>and</span> <span>it</span> <span>will</span> <span>be</span> <span>split</span>-<span>up</span><span></span>.</div>
	let XRegExp = require('xregexp');
	let nonUnicodeLetter = XRegExp('[^\\pL\\pM]+?');

	var sentence = "This is a sentence-with some punctuation, and it will be split-up."
	console.log("sentence: ", sentence)

	var tokens = sentence.split(nonUnicodeLetter)
	console.log("tokens: ", tokens)

	_sentence = sentence
	var response = ""
	tokens.forEach(function(token) {
	var regex = XRegExp('^(.*?)('+token+')')
	var match = _sentence.match(regex, '')
	_sentence = _sentence.replace(regex, '')
	console.log("token: ", token)
	console.log("match: ", match)
	response = response + match[1] + '<span>' + match[2] + '</span>'
	})
	response = "<div>" + response + _sentence + "</div>"
	console.log("response: ", response)

	// sentence: This is a sentence-with some punctuation, and it will be split-up.
	// tokens: [ 'This',
	// 'is',
	// 'a',
	// 'sentence',
	// 'with',
	// 'some',
	// 'punctuation',
	// '',
	// 'and',
	// 'it',
	// 'will',
	// 'be',
	// 'split',
	// 'up',
	// '' ]
	// token: This
	// match: [ 'This',
	// '',
	// 'This',
	// index: 0,
	// input: 'This is a sentence-with some punctuation, and it will be split-up.' ]
	// token: is
	// match: [ ' is',
	// ' ',
	// 'is',
	// index: 0,
	// input: ' is a sentence-with some punctuation, and it will be split-up.' ]
	// token: a
	// match: [ ' a',
	// ' ',
	// 'a',
	// index: 0,
	// input: ' a sentence-with some punctuation, and it will be split-up.' ]
	// token: sentence
	// match: [ ' sentence',
	// ' ',
	// 'sentence',
	// index: 0,
	// input: ' sentence-with some punctuation, and it will be split-up.' ]
	// token: with
	// match: [ '-with',
	// '-',
	// 'with',
	// index: 0,
	// input: '-with some punctuation, and it will be split-up.' ]
	// token: some
	// match: [ ' some',
	// ' ',
	// 'some',
	// index: 0,
	// input: ' some punctuation, and it will be split-up.' ]
	// token: punctuation
	// match: [ ' punctuation',
	// ' ',
	// 'punctuation',
	// index: 0,
	// input: ' punctuation, and it will be split-up.' ]
	// token:
	// match: [ '', '', '', index: 0, input: ', and it will be split-up.' ]
	// token: and
	// match: [ ', and',
	// ', ',
	// 'and',
	// index: 0,
	// input: ', and it will be split-up.' ]
	// token: it
	// match: [ ' it', ' ', 'it', index: 0, input: ' it will be split-up.' ]
	// token: will
	// match: [ ' will', ' ', 'will', index: 0, input: ' will be split-up.' ]
	// token: be
	// match: [ ' be', ' ', 'be', index: 0, input: ' be split-up.' ]
	// token: split
	// match: [ ' split', ' ', 'split', index: 0, input: ' split-up.' ]
	// token: up
	// match: [ '-up', '-', 'up', index: 0, input: '-up.' ]
	// token:
	// match: [ '', '', '', index: 0, input: '.' ]
	// response: <div><span>This</span> <span>is</span> <span>a</span> <span>sentence</span>-<span>with</span> <span>some</span> <span>punctuation</span><span></span>, <span>and</span> <span>it</span> <span>will</span> <span>be</span> <span>split</span>-<span>up</span><span></span>.</div>