I edit parameters with effective_batch_size = null. I have error after step 1000 when save first checkpoint. But the training time is shortened. Thanks.
tensorflow-gpu 1.13.1
Error :
Traceback (most recent call last):
File "***\anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1334, in _do_call
return fn(*args)
File "***\anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1319, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1407, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.OutOfRangeError: End of sequence
[[{{node IteratorGetNext}} = IteratorGetNext[output_shapes=[[?,?], [?], [?,?], [?,?], [?,?], [?], [?,?]], output_types=[DT_INT64, DT_INT32, DT_STRING, DT_INT64, DT_INT64, DT_INT32, DT_STRING], _device="/job:localhost/replica:0/task:0/device:CPU:0"](IteratorV2)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "***\anaconda3\lib\site-packages\tensorflow\python\training\evaluation.py", line 274, in _evaluate_once
session.run(eval_ops, feed_dict)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 671, in run
run_metadata=run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1156, in run
run_metadata=run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1255, in run
raise six.reraise(*original_exc_info)
File "***\anaconda3\lib\site-packages\six.py", line 693, in reraise
raise value
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1240, in run
return self._sess.run(*args, **kwargs)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1312, in run
run_metadata=run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1076, in run
return self._sess.run(*args, **kwargs)
File "***\anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 929, in run
run_metadata_ptr)
File "***\anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1152, in _run
feed_dict_tensor, options, run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1328, in _do_run
run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1348, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: End of sequence
[[node IteratorGetNext (defined at ***\anaconda3\lib\site-packages\opennmt\estimator.py:132) = IteratorGetNext[output_shapes=[[?,?], [?], [?,?], [?,?], [?,?], [?], [?,?]], output_types=[DT_INT64, DT_INT32, DT_STRING, DT_INT64, DT_INT64, DT_INT32, DT_STRING], _device="/job:localhost/replica:0/task:0/device:CPU:0"](IteratorV2)]]
Caused by op 'IteratorGetNext', defined at:
File "***\anaconda3\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "***\anaconda3\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "***\Anaconda3\Scripts\onmt-main.exe\__main__.py", line 9, in <module>
sys.exit(main())
File "***\anaconda3\lib\site-packages\opennmt\bin\main.py", line 172, in main
runner.train_and_evaluate(checkpoint_path=args.checkpoint_path)
File "***\anaconda3\lib\site-packages\opennmt\runner.py", line 297, in train_and_evaluate
result = tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 471, in train_and_evaluate
return executor.run()
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 610, in run
return self.run_local()
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 711, in run_local
saving_listeners=saving_listeners)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 354, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1207, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1241, in _train_model_default
saving_listeners)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1471, in _train_with_estimator_spec
_, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 671, in run
run_metadata=run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1156, in run
run_metadata=run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1240, in run
return self._sess.run(*args, **kwargs)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1320, in run
run_metadata=run_metadata))
File "***\anaconda3\lib\site-packages\tensorflow\python\training\basic_session_run_hooks.py", line 582, in after_run
if self._save(run_context.session, global_step):
File "***\anaconda3\lib\site-packages\tensorflow\python\training\basic_session_run_hooks.py", line 607, in _save
if l.after_save(session, step):
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 517, in after_save
self._evaluate(global_step_value) # updates self.eval_result
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 537, in _evaluate
self._evaluator.evaluate_and_export())
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 912, in evaluate_and_export
hooks=self._eval_spec.hooks)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 478, in evaluate
return _evaluate()
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 460, in _evaluate
self._evaluate_build_graph(input_fn, hooks, checkpoint_path))
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1484, in _evaluate_build_graph
self._call_model_fn_eval(input_fn, self.config))
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1517, in _call_model_fn_eval
input_fn, model_fn_lib.ModeKeys.EVAL)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1075, in _get_features_and_labels_from_input_fn
self._call_input_fn(input_fn, mode))
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1162, in _call_input_fn
return input_fn(**kwargs)
File "***\anaconda3\lib\site-packages\opennmt\estimator.py", line 132, in _fn
return iterator.get_next()
File "***\anaconda3\lib\site-packages\tensorflow\python\data\ops\iterator_ops.py", line 421, in get_next
name=name)), self._output_types,
File "***\anaconda3\lib\site-packages\tensorflow\python\ops\gen_dataset_ops.py", line 2069, in iterator_get_next
output_shapes=output_shapes, name=name)
File "***\anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "***\anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "***\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
op_def=op_def)
File "***\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
self._traceback = tf_stack.extract_stack()
OutOfRangeError (see above for traceback): End of sequence
[[node IteratorGetNext (defined at ***\anaconda3\lib\site-packages\opennmt\estimator.py:132) = IteratorGetNext[output_shapes=[[?,?], [?], [?,?], [?,?], [?,?], [?], [?,?]], output_types=[DT_INT64, DT_INT32, DT_STRING, DT_INT64, DT_INT64, DT_INT32, DT_STRING], _device="/job:localhost/replica:0/task:0/device:CPU:0"](IteratorV2)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "***\anaconda3\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "***\anaconda3\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "***\Anaconda3\Scripts\onmt-main.exe\__main__.py", line 9, in <module>
File "***\anaconda3\lib\site-packages\opennmt\bin\main.py", line 172, in main
runner.train_and_evaluate(checkpoint_path=args.checkpoint_path)
File "***\anaconda3\lib\site-packages\opennmt\runner.py", line 297, in train_and_evaluate
result = tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 471, in train_and_evaluate
return executor.run()
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 610, in run
return self.run_local()
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 711, in run_local
saving_listeners=saving_listeners)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 354, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1207, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1241, in _train_model_default
saving_listeners)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1471, in _train_with_estimator_spec
_, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 671, in run
run_metadata=run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1156, in run
run_metadata=run_metadata)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1255, in run
raise six.reraise(*original_exc_info)
File "***\anaconda3\lib\site-packages\six.py", line 693, in reraise
raise value
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1240, in run
return self._sess.run(*args, **kwargs)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1320, in run
run_metadata=run_metadata))
File "***\anaconda3\lib\site-packages\tensorflow\python\training\basic_session_run_hooks.py", line 582, in after_run
if self._save(run_context.session, global_step):
File "***\anaconda3\lib\site-packages\tensorflow\python\training\basic_session_run_hooks.py", line 607, in _save
if l.after_save(session, step):
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 517, in after_save
self._evaluate(global_step_value) # updates self.eval_result
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 537, in _evaluate
self._evaluator.evaluate_and_export())
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 912, in evaluate_and_export
hooks=self._eval_spec.hooks)
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 478, in evaluate
return _evaluate()
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 467, in _evaluate
output_dir=self.eval_dir(name))
File "***\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1591, in _evaluate_run
config=self._session_config)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\evaluation.py", line 274, in _evaluate_once
session.run(eval_ops, feed_dict)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 783, in __exit__
self._close_internal(exception_type)
File "***\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 816, in _close_internal
h.end(self._coordinated_creator.tf_sess)
File "***\anaconda3\lib\site-packages\opennmt\utils\hooks.py", line 266, in end
self._post_evaluation_fn(self._current_step, self._output_path)
File "***\anaconda3\lib\site-packages\opennmt\utils\evaluator.py", line 40, in __call__
score = scorer(self._labels_file, predictions_path)
File "***\anaconda3\lib\site-packages\opennmt\utils\evaluator.py", line 151, in __call__
stderr=subprocess.STDOUT)
File "***\anaconda3\lib\subprocess.py", line 336, in check_output
**kwargs).stdout
File "***\anaconda3\lib\subprocess.py", line 403, in run
with Popen(*popenargs, **kwargs) as process:
File "***\anaconda3\lib\subprocess.py", line 709, in __init__
restore_signals, start_new_session)
File "***\anaconda3\lib\subprocess.py", line 997, in _execute_child
startupinfo)
OSError: [WinError 193] %1 is not a valid Win32 application