How to add converter for XLMRoberta

Hey,

I want to use XLMRoberta with CTranslate2, I tried to write a Converter based on the Bert example, but my model is not producing the same output as the original model. Maybe you can help here. Btw. it would be nice where you find the information for the converter. I tried to read the paper but was not so super successful.

Here my attempt:

@register_loader("XLMRobertaConfig")
class XLMRobertaLoader(ModelLoader):
    @property
    def architecture_name(self):
        return "XLMRobertaModel"

    def get_model_spec(self, model):
        assert model.config.position_embedding_type == "absolute"

        encoder_spec = transformer_spec.TransformerEncoderSpec(
            model.config.num_hidden_layers,
            model.config.num_attention_heads,
            pre_norm=False,
            activation=_SUPPORTED_ACTIVATIONS[model.config.hidden_act],
            layernorm_embedding=True,
            num_source_embeddings=1,
        )

        spec = transformer_spec.TransformerEncoderModelSpec(
            encoder_spec,
            pooling_layer=True,
            pooling_activation=common_spec.Activation.Tanh,
        )

        spec.encoder.scale_embeddings = False

        self.set_embeddings(
            spec.encoder.embeddings[0], model.embeddings.word_embeddings
        )
        self.set_position_encodings(
            spec.encoder.position_encodings, model.embeddings.position_embeddings
        )
        self.set_layer_norm(
            spec.encoder.layernorm_embedding, model.embeddings.LayerNorm
        )

        self.set_linear(spec.pooler_dense, model.pooler.dense)

        for layer_spec, layer in zip(spec.encoder.layer, model.encoder.layer):
            split_layers = [common_spec.LinearSpec() for _ in range(3)]
            self.set_linear(split_layers[0], layer.attention.self.query)
            self.set_linear(split_layers[1], layer.attention.self.key)
            self.set_linear(split_layers[2], layer.attention.self.value)
            utils.fuse_linear(layer_spec.self_attention.linear[0], split_layers)

            self.set_linear(
                layer_spec.self_attention.linear[1], layer.attention.output.dense
            )
            self.set_layer_norm(
                layer_spec.self_attention.layer_norm, layer.attention.output.LayerNorm
            )

            self.set_linear(layer_spec.ffn.linear_0, layer.intermediate.dense)
            self.set_linear(layer_spec.ffn.linear_1, layer.output.dense)
            self.set_layer_norm(layer_spec.ffn.layer_norm, layer.output.LayerNorm)

        return spec

    def set_vocabulary(self, spec, tokens):
        spec.register_vocabulary(tokens)

    def set_config(self, config, model, tokenizer):
        config.unk_token = tokenizer.unk_token
        config.layer_norm_epsilon = model.config.layer_norm_eps

Thank you for your help!

Hi,

I see that the embedding layer model.embeddings.token_type_embeddings is missing.

It is important to add it and set the corresponding spec arguments:

            num_source_embeddings=2,
            embeddings_merge=common_spec.EmbeddingsMerge.ADD,