Greetings.
I am having trouble updating from OpenNMT-tf 2.18.1 to 2.27.1, using SequenceRecordInputter for the source.
My code works for OpenNMT-tf 2.18.1. When I re-run with OpenNMT-tf 2.27.1, TensorFlow 2.8.0 and Python 3.8, I get an error. I am re-generating the SequenceRecords with these updated versions. The error is “Cannot convert a list containing a tensor of dtype <dtype: ‘int32’> to <dtype: ‘int64’>”.
Here is the log file from one of my training attempts:
OpenNMT-tf 2.27.1
2022-06-29 16:14:09.228000: I main.py:309] Using OpenNMT-tf version 2.27.1
2022-06-29 16:14:09.229000: I main.py:309] Using model:
(model): Transformer(
(examples_inputter): SequenceToSequenceInputter(
(features_inputter): SequenceRecordInputter()
(labels_inputter): WordEmbedder()
(inputters): ListWrapper(
(0): SequenceRecordInputter()
(1): WordEmbedder()
)
)
(encoder): SelfAttentionEncoder(
(layer_norm): LayerNorm()
(layers): ListWrapper(
(0): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(1): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(2): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(3): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(4): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(5): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(6): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
)
)
(decoder): SelfAttentionDecoder(
(layer_norm): LayerNorm()
(layers): ListWrapper(
(0): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(1): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(2): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(3): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(4): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(5): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(6): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(1024)
(linear_keys): Dense(1024)
(linear_values): Dense(1024)
(linear_output): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(64)
(outer): Dense(1024)
)
(input_layer_norm): LayerNorm()
)
)
)
)
)2022-06-29 16:14:09.232201: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-29 16:14:10.264266: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30989 MB memory: → device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:5e:00.0, compute capability: 7.0
2022-06-29 16:14:10.265538: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 30989 MB memory: → device: 1, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:af:00.0, compute capability: 7.0
2022-06-29 16:14:10.272000: I main.py:318] Using parameters:
data:
eval_features_file: REL_L7_N1_W20_M24_D0B/data/valid.ru.vtf.gz
eval_labels_file: REL_L7_N1_W20_M24_D0B/data/valid.sp32k.en
target_vocabulary: REL_L7_N1_W20_M24_D0B/data/wmt19-ruen-en-32k.onmt.vocab
train_features_file: REL_L7_N1_W20_M24_D0B/data/subtrain.ru.vtf.gz
train_labels_file: REL_L7_N1_W20_M24_D0B/data/subtrain.sp32k.en
eval:
batch_size: 32
batch_type: examples
external_evaluators: BLEU
length_bucket_width: 5
steps: 1000000000
infer:
batch_size: 32
batch_type: examples
length_bucket_width: 5
model_dir: REL_L7_N1_W20_M24_D0B
params:
average_loss_in_time: true
beam_width: 4
decay_params:
model_dim: 1024
warmup_steps: 8000
decay_type: NoamDecay
label_smoothing: 0.1
learning_rate: 2.0
num_hypotheses: 1
optimizer: LazyAdam
optimizer_params:
beta_1: 0.9
beta_2: 0.998
score:
batch_size: 64
batch_type: examples
length_bucket_width: 5
train:
average_last_checkpoints: 8
batch_size: 3072
batch_type: tokens
effective_batch_size: 25000
keep_checkpoint_max: 8
length_bucket_width: 1
max_step: null
maximum_features_length: 3000
maximum_labels_length: 200
sample_buffer_size: -1
save_checkpoints_steps: 10000
save_summary_steps: 100
single_pass: true2022-06-29 16:14:10.703000: I inputter.py:316] Initialized target input layer:
2022-06-29 16:14:10.703000: I inputter.py:316] - vocabulary size: 32001
2022-06-29 16:14:10.703000: I inputter.py:316] - special tokens: BOS=yes, EOS=yes
2022-06-29 16:14:10.706000: W runner.py:246] No checkpoint to restore in REL_L7_N1_W20_M24_D0B
2022-06-29 16:14:10.722000: W deprecation.py:337] From /opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/summary/summary_iterator.py:27: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.
Instructions for updating:
Use eager execution and:
tf.data.TFRecordDataset(path)
Traceback (most recent call last):
File “/opennmt/OpenNMT-tf_v2.27.1/bin/onmt-main”, line 11, in
load_entry_point(‘OpenNMT-tf’, ‘console_scripts’, ‘onmt-main’)()
File “/opennmt/OpenNMT-tf_v2.27.1/opennmt/bin/main.py”, line 318, in main
runner.train(
File “/opennmt/OpenNMT-tf_v2.27.1/opennmt/runner.py”, line 253, in train
evaluator = evaluation.Evaluator.from_config(model, config)
File “/opennmt/OpenNMT-tf_v2.27.1/opennmt/evaluation.py”, line 189, in from_config
return cls(
File “/opennmt/OpenNMT-tf_v2.27.1/opennmt/evaluation.py”, line 108, in init
dataset = model.examples_inputter.make_evaluation_dataset(
File “/opennmt/OpenNMT-tf_v2.27.1/opennmt/inputters/inputter.py”, line 687, in make_evaluation_dataset
dataset = dataset.apply(
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py”, line 2238, in apply
dataset = transformation_func(self)
File “/opennmt/OpenNMT-tf_v2.27.1/opennmt/data/dataset.py”, line 734, in _pipeline
dataset = dataset.apply(
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py”, line 2238, in apply
dataset = transformation_func(self)
File “/opennmt/OpenNMT-tf_v2.27.1/opennmt/data/dataset.py”, line 482, in
return lambda dataset: dataset.group_by_window(_key_func, _reduce_func, **kwargs)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py”, line 2791, in group_by_window
return _GroupByWindowDataset(
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py”, line 5651, in init
self._make_key_func(key_func, input_dataset)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py”, line 5691, in _make_key_func
self._key_func = structured_function.StructuredFunctionWrapper(
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py”, line 271, in init
self._function = fn_factory()
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/eager/function.py”, line 3070, in get_concrete_function
graph_function = self._get_concrete_function_garbage_collected(
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/eager/function.py”, line 3036, in _get_concrete_function_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/eager/function.py”, line 3292, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/eager/function.py”, line 3130, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/framework/func_graph.py”, line 1161, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py”, line 248, in wrapped_fn
ret = wrapper_helper(*args)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py”, line 177, in wrapper_helper
ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/autograph/impl/api.py”, line 689, in wrapper
return converted_call(f, args, kwargs, options=options)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/autograph/impl/api.py”, line 377, in converted_call
return _call_unconverted(f, args, kwargs, options)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/autograph/impl/api.py”, line 458, in _call_unconverted
return f(*args, **kwargs)
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py”, line 5689, in key_func_wrapper
return ops.convert_to_tensor(key_func(*args), dtype=dtypes.int64)
File “/opennmt/OpenNMT-tf_v2.27.1/opennmt/data/dataset.py”, line 442, in _key_func
bucket_id = tf.reduce_max(
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/util/traceback_utils.py”, line 153, in error_handler
raise e.with_traceback(filtered_tb) from None
File “/opennmt/OpenNMT-tf_v2.27.1/lib64/python3.8/site-packages/tensorflow/python/ops/array_ops.py”, line 1461, in _autopacking_helper
raise TypeError(f"Cannot convert a list containing a tensor of dtype "
TypeError: Cannot convert a list containing a tensor of dtype <dtype: ‘int32’> to <dtype: ‘int64’> (Tensor is: <tf.Tensor ‘Max_1:0’ shape=() dtype=int32>)
I could not find information in the change logs pertinent to this error. Let me know if other files (SequenceRecord creation, model file, etc.) would be of help!
Thanks,
Gerd