I used the code below to translate a sentence using OpenNMT-py Translation API:
from onmt.translate.translator import build_translator
from argparse import Namespace
import sentencepiece as spm
tokenizer = spm.SentencePieceProcessor()
tokenizer.Load('training-data/en.model')
opt = Namespace(models=['model_step_30000.pt'], n_best=1, alpha=0.0, batch_type='sents', beam_size=5, beta=-0.0, block_ngram_repeat=0, coverage_penalty='none', data_type='text', dump_beam='', fp32=False, gpu=-1, ignore_when_blocking=[], length_penalty='none', max_length=100, max_sent_length=None, min_length=0, output='/dev/null', phrase_table='', random_sampling_temp=1.0, random_sampling_topk=1, ratio=-0.0, replace_unk=False, report_align=False, report_time=False, seed=829, stepwise_penalty=False, tgt=None, verbose=False, quant_layers='w_1', quant_type='bnb_NF4', world_size=0, precision='fp16', random_sampling_topp=0.75, ban_unk_token='false', tgt_file_prefix='false', gold_align='false', with_score='false')
translator = build_translator(opt, report_score=False)
sentence = 'it works!'
tokens = tokenizer.EncodeAsPieces(sentence)
sentence = " ".join(tokens)
translated = translator.translate_batch([sentence], attn_debug=False)
detokenized = tokenizer.DecodePieces(translated[1][0][0].split())
print(detokenized)
# es funktioniert!
But, the following error occurred when executing the code above:
Traceback (most recent call last):
File "/root/opennmt/opennmtAPI.py", line 13, in <module>
translated = translator.translate_batch([sentence], attn_debug=False)
File "/usr/local/lib/python3.9/site-packages/onmt/translate/translator.py", line 813, in translate_batch
batch_size=len(batch["srclen"]),
TypeError: list indices must be integers or slices, not str
Can anybody tell me what’s going on?