eileen-code4fun · January 21, 2022 06:03
diff --git a/translation_prepro.py b/translation_prepro.py
 def standardize(text):
    # Split accecented characters.
    text = tf_text.normalize_utf8(text, 'NFKD')
    text = tf.strings.lower(text)
    # Keep space, a to z, and select punctuation.
    text = tf.strings.regex_replace(text, '[^ a-z.?!,¿]', '')
    # Add spaces around punctuation.
    text = tf.strings.regex_replace(text, '[.?!,¿]', r' \0 ')
    # Strip whitespace.
    text = tf.strings.strip(text)

    text = tf.strings.join(['[START]', text, '[END]'], separator=' ')
    return text
  
 eng_text_processor = tf.keras.layers.TextVectorization(standardize=standardize, max_tokens=5000)
 spa_text_processor = tf.keras.layers.TextVectorization(standardize=standardize, max_tokens=5000)

 eng_text_processor.adapt(eng_dataset.batch(128))
 spa_text_processor.adapt(spa_dataset.batch(128))
	def standardize(text):
	# Split accecented characters.
	text = tf_text.normalize_utf8(text, 'NFKD')
	text = tf.strings.lower(text)
	# Keep space, a to z, and select punctuation.
	text = tf.strings.regex_replace(text, '[^ a-z.?!,¿]', '')
	# Add spaces around punctuation.
	text = tf.strings.regex_replace(text, '[.?!,¿]', r' \0 ')
	# Strip whitespace.
	text = tf.strings.strip(text)

	text = tf.strings.join(['[START]', text, '[END]'], separator=' ')
	return text

	eng_text_processor = tf.keras.layers.TextVectorization(standardize=standardize, max_tokens=5000)
	spa_text_processor = tf.keras.layers.TextVectorization(standardize=standardize, max_tokens=5000)

	eng_text_processor.adapt(eng_dataset.batch(128))
	spa_text_processor.adapt(spa_dataset.batch(128))
No results found