Traceback (most recent call last):
File “d:\anaconda3\librunpy.py”, line 194, in run_module_as_main
return run_code(code, main_globals, None,
File “d:\anaconda3\librunpy.py”, line 87, in run_code
exec(code, run_globals)
File "D:\Anaconda3\Scripts\onmt-build-vocab.exe_main.py", line 7, in
File "d:\anaconda3\lib\site-packages\opennmt\bin\buildvocab.py", line 153, in main
vocab.add_from_text(data_file, tokenizer=tokenizer)
File “d:\anaconda3\lib\site-packages\opennmt\datavocab.py”, line 87, in add_from_text
for line in text:
File "d:\anaconda3\lib\site-packages\tensorflow\python\lib\io\fileio.py", line 203, in next
retval = self.readline()
File “d:\anaconda3\lib\site-packages\tensorflow\python\lib\io\file_io.py”, line 167, in readline
self.preread_check()
File "d:\anaconda3\lib\site-packages\tensorflow\python\lib\io\fileio.py", line 76, in _preread_check
self._read_buf = _pywrap_file_io.BufferedInputStream(
UnicodeDecodeError: ‘utf-8’ codec can’t decode byte 0xd5 in position 92: invalid continuation byte
The corpus is an ancient Chinese text.