New log file:
2020-12-03 14:28:52.473786: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2020-12-03 14:28:53.363827: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2020-12-03 14:28:53.405601: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-12-03 14:28:53.405628: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2020-12-03 14:28:53.406893: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-12-03 14:28:53.407822: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-12-03 14:28:53.408027: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-12-03 14:28:53.409325: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-12-03 14:28:53.410258: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-12-03 14:28:53.412850: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2020-12-03 14:28:53.413917: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
INFO:tensorflow:Using model:
(model): TransformerBase(
(examples_inputter): SequenceToSequenceInputter(
(features_inputter): WordEmbedder()
(labels_inputter): WordEmbedder()
(inputters): ListWrapper(
(0): WordEmbedder()
(1): WordEmbedder()
)
)
(encoder): SelfAttentionEncoder(
(position_encoder): SinusoidalPositionEncoder(
(reducer): SumReducer()
)
(layer_norm): LayerNorm()
(layers): ListWrapper(
(0): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(1): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(2): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(3): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(4): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(5): SelfAttentionEncoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
)
)
(decoder): SelfAttentionDecoder(
(position_encoder): SinusoidalPositionEncoder(
(reducer): SumReducer()
)
(layer_norm): LayerNorm()
(layers): ListWrapper(
(0): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(1): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(2): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(3): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(4): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(5): SelfAttentionDecoderLayer(
(self_attention): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
(attention): ListWrapper(
(0): TransformerLayerWrapper(
(layer): MultiHeadAttention(
(linear_queries): Dense(512)
(linear_keys): Dense(512)
(linear_values): Dense(512)
(linear_output): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
(ffn): TransformerLayerWrapper(
(layer): FeedForwardNetwork(
(inner): Dense(2048)
(outer): Dense(512)
)
(input_layer_norm): LayerNorm()
)
)
)
)
)
INFO:tensorflow:Using parameters:
data:
eval_features_file: path/data/src-val.txt
eval_labels_file: path/data/tgt-val.txt
source_tokenization: path/tok.yml
source_vocabulary: path/src-sp-vocab
target_tokenization: path/tok.yml
target_vocabulary: path/tgt-sp-vocab
train_features_file: path/data/src-train.txt
train_labels_file: path/data/tgt-train.txt
eval:
batch_size: 32
batch_type: examples
early_stopping:
metric: bleu
min_improvement: 0.2
steps: 4
export_on_best: bleu
external_evaluators:
- bleu
length_bucket_width: 5
save_eval_predictions: true
steps: 1000
infer:
batch_size: 32
batch_type: examples
length_bucket_width: 5
model_dir: path/
params:
average_loss_in_time: true
beam_width: 4
decay_params:
model_dim: 512
warmup_steps: 8000
decay_type: NoamDecay
dropout: 0.2
label_smoothing: 0.1
learning_rate: 2.0
num_hypotheses: 1
optimizer: LazyAdam
optimizer_params:
beta_1: 0.9
beta_2: 0.998
replace_unknown_target: true
score:
batch_size: 64
train:
average_last_checkpoints: 6
batch_size: 3072
batch_type: tokens
effective_batch_size: 25000
keep_checkpoint_max: 6
length_bucket_width: 1
max_step: 30000
maximum_features_length: 100
maximum_labels_length: 100
sample_buffer_size: -1
save_checkpoints_steps: 1000
save_summary_steps: 100
2020-12-03 14:28:53.677638: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2020-12-03 14:28:53.835254: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-12-03 14:28:53.840206: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 3000000000 Hz
2020-12-03 14:28:53.840407: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5b0ef10 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-12-03 14:28:53.840421: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2020-12-03 14:28:53.920386: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5465ac0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-12-03 14:28:53.920408: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-12-03 14:28:53.920995: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-12-03 14:28:53.921017: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2020-12-03 14:28:53.921032: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-12-03 14:28:53.921040: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-12-03 14:28:53.921048: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-12-03 14:28:53.921055: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-12-03 14:28:53.921064: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-12-03 14:28:53.921071: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2020-12-03 14:28:53.922113: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-12-03 14:28:53.922136: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2020-12-03 14:28:54.305002: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-12-03 14:28:54.305031: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0
2020-12-03 14:28:54.305040: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N
2020-12-03 14:28:54.306104: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 9983 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5)
WARNING:tensorflow:No checkpoint to restore in path/
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/summary/summary_iterator.py:68: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.
Instructions for updating:
Use eager execution and:
`tf.data.TFRecordDataset(path)`
INFO:tensorflow:Accumulate gradients of 9 iterations to reach effective batch size of 25000
INFO:tensorflow:Training on 1794178 examples
2020-12-03 14:28:57.682415: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.686997: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.693292: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.695026: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.698350: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.699687: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.702974: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.704627: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.707932: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.709246: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.711573: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:28:57.712686: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
INFO:tensorflow:Number of model parameters: 97639783
INFO:tensorflow:Number of model weights: 260 (trainable = 260, non trainable = 0)
2020-12-03 14:29:18.366021: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:29:18.729418: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:29:25.789581: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1924] Converted 2097/8021 nodes to float16 precision using 210 cast(s) to float16 (excluding Const and Variable casts)
2020-12-03 14:29:27.685399: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1924] Converted 0/7954 nodes to float16 precision using 0 cast(s) to float16 (excluding Const and Variable casts)
2020-12-03 14:29:28.791003: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:29:28.792977: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:29:28.795782: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:29:28.797444: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:29:28.933750: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-12-03 14:29:35.979950: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2020-12-03 14:30:04.096776: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:30:05.185197: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
INFO:tensorflow:Saved checkpoint path/ckpt-1
2020-12-03 14:32:26.439466: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:32:26.442823: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
INFO:tensorflow:Step = 100 ; steps/s = 0.47, source words/s = 11079, target words/s = 13193 ; Learning rate = 0.000012 ; Loss = 9.749232
INFO:tensorflow:Step = 200 ; steps/s = 0.78, source words/s = 18492, target words/s = 22009 ; Learning rate = 0.000025 ; Loss = 8.633177
INFO:tensorflow:Step = 300 ; steps/s = 0.78, source words/s = 18478, target words/s = 21991 ; Learning rate = 0.000037 ; Loss = 7.748647
INFO:tensorflow:Step = 400 ; steps/s = 0.78, source words/s = 18494, target words/s = 21994 ; Learning rate = 0.000050 ; Loss = 7.308655
INFO:tensorflow:Step = 500 ; steps/s = 0.78, source words/s = 18465, target words/s = 21979 ; Learning rate = 0.000062 ; Loss = 7.062311
INFO:tensorflow:Step = 600 ; steps/s = 0.78, source words/s = 18461, target words/s = 21974 ; Learning rate = 0.000074 ; Loss = 6.703609
INFO:tensorflow:Step = 700 ; steps/s = 0.78, source words/s = 18453, target words/s = 21956 ; Learning rate = 0.000087 ; Loss = 6.461774
INFO:tensorflow:Step = 800 ; steps/s = 0.78, source words/s = 18461, target words/s = 21960 ; Learning rate = 0.000099 ; Loss = 6.419743
INFO:tensorflow:Step = 900 ; steps/s = 0.78, source words/s = 18451, target words/s = 21958 ; Learning rate = 0.000111 ; Loss = 6.081983
2020-12-03 14:50:29.072789: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:50:29.075731: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:50:29.080340: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:50:29.082645: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:50:29.087326: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:50:29.089833: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:50:29.093954: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:50:29.095920: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
INFO:tensorflow:Step = 1000 ; steps/s = 0.78, source words/s = 17411, target words/s = 20656 ; Learning rate = 0.000124 ; Loss = 5.770612
INFO:tensorflow:Saved checkpoint path/ckpt-1000
INFO:tensorflow:Running evaluation for step 1000
2020-12-03 14:51:50.465996: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.468040: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.470948: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.472563: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.478207: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.484484: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.492122: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.494638: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.498928: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.501751: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.506327: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.508915: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:50.512053: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 14:51:58.075717: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1924] Converted 1877/5047 nodes to float16 precision using 10 cast(s) to float16 (excluding Const and Variable casts)
2020-12-03 14:51:59.340843: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1924] Converted 0/4972 nodes to float16 precision using 0 cast(s) to float16 (excluding Const and Variable casts)
INFO:tensorflow:Evaluation predictions saved to path/eval/predictions.txt.1000
INFO:tensorflow:Evaluation result for step 1000: loss = 5.128579 ; perplexity = 168.777054 ; bleu = 4.478457
INFO:tensorflow:Exporting model to path/export/1000 (best bleu so far: 4.478457)
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/tracking/tracking.py:111: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: path/export/1000/assets
INFO:tensorflow:Extra assets written to: path/export/1000/assets.extra
(.....)
INFO:tensorflow:Step = 19100 ; steps/s = 0.57, source words/s = 12604, target words/s = 14967 ; Learning rate = 0.000640 ; Loss = 2.022251
INFO:tensorflow:Step = 19200 ; steps/s = 0.78, source words/s = 18439, target words/s = 21935 ; Learning rate = 0.000638 ; Loss = 2.145678
INFO:tensorflow:Step = 19300 ; steps/s = 0.78, source words/s = 18432, target words/s = 21937 ; Learning rate = 0.000636 ; Loss = 2.331972
INFO:tensorflow:Step = 19400 ; steps/s = 0.78, source words/s = 18444, target words/s = 21945 ; Learning rate = 0.000635 ; Loss = 2.173255
INFO:tensorflow:Step = 19500 ; steps/s = 0.78, source words/s = 18435, target words/s = 21948 ; Learning rate = 0.000633 ; Loss = 2.056362
INFO:tensorflow:Step = 19600 ; steps/s = 0.78, source words/s = 18447, target words/s = 21947 ; Learning rate = 0.000631 ; Loss = 2.371855
INFO:tensorflow:Step = 19700 ; steps/s = 0.78, source words/s = 18443, target words/s = 21936 ; Learning rate = 0.000630 ; Loss = 2.321309
INFO:tensorflow:Step = 19800 ; steps/s = 0.78, source words/s = 18442, target words/s = 21948 ; Learning rate = 0.000628 ; Loss = 2.014471
INFO:tensorflow:Step = 19900 ; steps/s = 0.78, source words/s = 18442, target words/s = 21932 ; Learning rate = 0.000627 ; Loss = 2.336197
INFO:tensorflow:Step = 20000 ; steps/s = 0.82, source words/s = 18294, target words/s = 21696 ; Learning rate = 0.000625 ; Loss = 2.040690
INFO:tensorflow:Saved checkpoint path/ckpt-20000
INFO:tensorflow:Running evaluation for step 20000
2020-12-03 21:59:40.194580: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.197259: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.200636: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.202503: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.208499: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.215714: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.224798: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.227567: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.232539: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.235524: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.240517: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.242977: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
2020-12-03 21:59:40.246067: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1345] No whitelist ops found, nothing to do
INFO:tensorflow:Evaluation predictions saved to path/eval/predictions.txt.20000
INFO:tensorflow:Evaluation result for step 20000: loss = 0.873911 ; perplexity = 2.396265 ; bleu = 56.630961
(.....)
INFO:tensorflow:Using parameters:
data:
eval_features_file: path/data/src-val.txt
eval_labels_file: path/data/tgt-val.txt
source_tokenization: path/tok.yml
source_vocabulary: path/src-sp-vocab
target_tokenization: path/tok.yml
target_vocabulary: path/tgt-sp-vocab
train_features_file: path/data/src-train.txt
train_labels_file: path/data/tgt-train.txt
eval:
batch_size: 32
batch_type: examples
early_stopping:
metric: bleu
min_improvement: 0.2
steps: 4
export_on_best: bleu
external_evaluators:
- bleu
length_bucket_width: 5
save_eval_predictions: true
steps: 1000
infer:
batch_size: 32
batch_type: examples
length_bucket_width: 5
model_dir: path/
params:
average_loss_in_time: true
beam_width: 4
decay_params:
model_dim: 512
warmup_steps: 8000
decay_type: NoamDecay
dropout: 0.2
label_smoothing: 0.1
learning_rate: 2.0
num_hypotheses: 1
optimizer: LazyAdam
optimizer_params:
beta_1: 0.9
beta_2: 0.998
replace_unknown_target: true
score:
batch_size: 64
train:
average_last_checkpoints: 6
batch_size: 3072
batch_type: tokens
effective_batch_size: 25000
keep_checkpoint_max: 6
length_bucket_width: 1
max_step: 30000
maximum_features_length: 100
maximum_labels_length: 100
sample_buffer_size: -1
save_checkpoints_steps: 1000
save_summary_steps: 100
INFO:tensorflow:Restored checkpoint path/ckpt-30000
INFO:tensorflow:Averaging 6 checkpoints...
INFO:tensorflow:Reading checkpoint path/ckpt-25000...
INFO:tensorflow:Reading checkpoint path/ckpt-26000...
INFO:tensorflow:Reading checkpoint path/ckpt-27000...
INFO:tensorflow:Reading checkpoint path/ckpt-28000...
INFO:tensorflow:Reading checkpoint path/ckpt-29000...
INFO:tensorflow:Reading checkpoint path/ckpt-30000...
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.loss_scale
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.base_optimizer
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.loss_scale.current_loss_scale
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.loss_scale.good_steps
WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.