Skip to content

Instantly share code, notes, and snippets.

@duvallj
Created October 3, 2021 02:12
Show Gist options
  • Save duvallj/862de61108806f55a6db0fdf607a9280 to your computer and use it in GitHub Desktop.
Save duvallj/862de61108806f55a6db0fdf607a9280 to your computer and use it in GitHub Desktop.
OpenNMT training failure log
2021-10-02 22:09:39.109000: I main.py:299] Using OpenNMT-tf version 2.22.0
2021-10-02 22:09:39.109000: I main.py:299] Using model:
(model): TransformerBase(
(examples_inputter): SequenceToSequenceInputter(
(features_inputter): WordEmbedder()
(labels_inputter): WordEmbedder()
(inputters): ListWrapper(
(0): WordEmbedder()
(1): WordEmbedder()
)
)
(encoder): SelfAttentionEncoder(
(position_encoder): SinusoidalPositionEncoder(
(reducer): SumReducer()
)
(layer_norm): LayerNorm()
(layers): ListWrapper(
(0): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(1): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(2): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(3): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(4): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(5): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
)
)
(decoder): SelfAttentionDecoder(
(position_encoder): SinusoidalPositionEncoder(
(reducer): SumReducer()
)
(layer_norm): LayerNorm()
(layers): ListWrapper(
(0): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(1): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(2): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(3): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(4): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(5): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
)
)
)
2021-10-02 22:09:39.115346: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-10-02 22:09:39.635124: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6633 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1
2021-10-02 22:09:39.639000: I main.py:308] Using parameters:
data:
eval_features_file: src-val.txt
eval_labels_file: tgt-val.txt
source_vocabulary: src-vocab.txt
target_vocabulary: tgt-vocab.txt
train_features_file: src-train.txt
train_labels_file: tgt-train.txt
eval:
batch_size: 32
batch_type: examples
length_bucket_width: 5
infer:
batch_size: 32
batch_type: examples
length_bucket_width: 5
model_dir: run/
params:
average_loss_in_time: true
beam_width: 4
decay_params:
model_dim: 512
warmup_steps: 8000
decay_type: NoamDecay
label_smoothing: 0.1
learning_rate: 2.0
num_hypotheses: 1
optimizer: LazyAdam
optimizer_params:
beta_1: 0.9
beta_2: 0.998
score:
batch_size: 64
batch_type: examples
length_bucket_width: 5
train:
average_last_checkpoints: 8
batch_size: 3072
batch_type: tokens
effective_batch_size: 25000
keep_checkpoint_max: 8
length_bucket_width: 1
max_step: 500000
maximum_features_length: 100
maximum_labels_length: 100
sample_buffer_size: -1
save_summary_steps: 100
2021-10-02 22:09:40.030000: I inputter.py:307] Initialized source input layer:
2021-10-02 22:09:40.031000: I inputter.py:307] - vocabulary size: 24998
2021-10-02 22:09:40.031000: I inputter.py:307] - special tokens: BOS=no, EOS=no
2021-10-02 22:09:40.139000: I inputter.py:307] Initialized target input layer:
2021-10-02 22:09:40.139000: I inputter.py:307] - vocabulary size: 35820
2021-10-02 22:09:40.139000: I inputter.py:307] - special tokens: BOS=yes, EOS=yes
2021-10-02 22:09:40.143000: W runner.py:243] No checkpoint to restore in run/
2021-10-02 22:09:40.155000: W deprecation.py:339] From d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\summary\summary_iterator.py:31: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.
Instructions for updating:
Use eager execution and:
`tf.data.TFRecordDataset(path)`
2021-10-02 22:09:41.265000: I main.py:308] Accumulate gradients of 9 iterations to reach effective batch size of 25000
2021-10-02 22:09:41.285000: I dataset_ops.py:2075] Training on 10000 examples
2021-10-02 22:09:41.918145: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2021-10-02 22:10:03.535306: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8101
2021-10-02 22:10:04.791625: W tensorflow/core/framework/op_kernel.cc:1680] Invalid argument: UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: unexpected end of data
Traceback (most recent call last):
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\ops\script_ops.py", line 249, in __call__
ret = func(*args)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 645, in wrapper
return func(*args, **kwargs)
File "C:\Users\Me\AppData\Local\Temp\tmptxqn1apx.py", line 46, in _record
ag__.for_stmt(ag__.converted_call(ag__.ld(oov_tokens).flatten, (), None, fscope_1), None, loop_body, get_state, set_state, (), {'iterate_names': 'oov_token'})
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 442, in for_stmt
_py_for_stmt(iter_, extra_test, body, None, None)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 471, in _py_for_stmt
body(target)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 457, in protected_body
original_body(protected_iter)
File "C:\Users\Me\AppData\Local\Temp\tmptxqn1apx.py", line 44, in loop_body
ag__.ld(all_oov_tokens)[ag__.converted_call(ag__.ld(oov_token).decode, ('utf-8',), None, fscope_1)] += 1
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 340, in converted_call
return _call_unconverted(f, args, kwargs, options, False)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 464, in _call_unconverted
return f(*args)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: unexpected end of data
Traceback (most recent call last):
File "C:\Python38\lib\runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Python38\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "D:\Documents\CMU\10-417\project_workspace\venv\Scripts\onmt-main.exe\__main__.py", line 7, in <module>
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\bin\main.py", line 308, in main
runner.train(
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\runner.py", line 276, in train
summary = trainer(
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\training.py", line 121, in __call__
for i, loss in enumerate(
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\training.py", line 262, in _steps
loss = forward_fn()
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\eager\def_function.py", line 885, in __call__
result = self._call(*args, **kwds)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\eager\def_function.py", line 917, in _call
return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\eager\function.py", line 3039, in __call__
return graph_function._call_flat(
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\eager\function.py", line 1963, in _call_flat
return self._build_call_outputs(self._inference_function.call(
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\eager\function.py", line 591, in call
outputs = execute.execute(
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\eager\execute.py", line 59, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: unexpected end of data
Traceback (most recent call last):
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\ops\script_ops.py", line 249, in __call__
ret = func(*args)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 645, in wrapper
return func(*args, **kwargs)
File "C:\Users\Me\AppData\Local\Temp\tmptxqn1apx.py", line 46, in _record
ag__.for_stmt(ag__.converted_call(ag__.ld(oov_tokens).flatten, (), None, fscope_1), None, loop_body, get_state, set_state, (), {'iterate_names': 'oov_token'})
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 442, in for_stmt
_py_for_stmt(iter_, extra_test, body, None, None)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 471, in _py_for_stmt
body(target)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 457, in protected_body
original_body(protected_iter)
File "C:\Users\Me\AppData\Local\Temp\tmptxqn1apx.py", line 44, in loop_body
ag__.ld(all_oov_tokens)[ag__.converted_call(ag__.ld(oov_token).decode, ('utf-8',), None, fscope_1)] += 1
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 340, in converted_call
return _call_unconverted(f, args, kwargs, options, False)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 464, in _call_unconverted
return f(*args)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: unexpected end of data
[[node PyFunc_1 (defined at d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\training.py:721) ]]
(1) Invalid argument: UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: unexpected end of data
Traceback (most recent call last):
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\ops\script_ops.py", line 249, in __call__
ret = func(*args)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 645, in wrapper
return func(*args, **kwargs)
File "C:\Users\Me\AppData\Local\Temp\tmptxqn1apx.py", line 46, in _record
ag__.for_stmt(ag__.converted_call(ag__.ld(oov_tokens).flatten, (), None, fscope_1), None, loop_body, get_state, set_state, (), {'iterate_names': 'oov_token'})
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 442, in for_stmt
_py_for_stmt(iter_, extra_test, body, None, None)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 471, in _py_for_stmt
body(target)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 457, in protected_body
original_body(protected_iter)
File "C:\Users\Me\AppData\Local\Temp\tmptxqn1apx.py", line 44, in loop_body
ag__.ld(all_oov_tokens)[ag__.converted_call(ag__.ld(oov_token).decode, ('utf-8',), None, fscope_1)] += 1
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 340, in converted_call
return _call_unconverted(f, args, kwargs, options, False)
File "d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 464, in _call_unconverted
return f(*args)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: unexpected end of data
[[node PyFunc_1 (defined at d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\training.py:721) ]]
[[gradients/global_norm/write_summary/summary_cond/then/_267/gradients/global_norm/write_summary/_59]]
0 successful operations.
0 derived errors ignored. [Op:__inference__forward_32442]
Errors may have originated from an input operation.
Input Source operations connected to node PyFunc_1:
GatherNd_1 (defined at d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\inputters\text_inputter.py:384)
Sum_5 (defined at d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\training.py:716)
Input Source operations connected to node PyFunc_1:
GatherNd_1 (defined at d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\inputters\text_inputter.py:384)
Sum_5 (defined at d:\documents\cmu\10-417\project_workspace\venv\lib\site-packages\opennmt\training.py:716)
Function call stack:
_forward -> _forward
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment