I have a encoder module, which i have converted to ctranslate in the following precision
- fp32
- fp16
- bf16
- int8
and i try to run the following code:
import torch.nn.functional
import ctranslate2
import numpy as np
import torch
import transformers
from sentence_transformers import SentenceTransformer
def ctranslate_infer(model_path, tokenizer_name):
device = "cuda"
encoder = ctranslate2.Encoder(model_path, device=device)
tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_name)
inputs = ["It was good!"]
tokens = tokenizer(inputs).input_ids
output = encoder.forward_batch(tokens)
# for out in output:
embeddings = torch.as_tensor(output.last_hidden_state, device=device)
embeddings = embeddings[0][0]
print(embeddings.shape)
embeddings = torch.nn.functional.normalize(embeddings.view(1,-1), p=2.0)
print(embeddings)
def st_infer(model_path):
inputs = ["It was good!"]
st_model = SentenceTransformer(model_path)
print(st_model)
print(st_model.encode(inputs))
if __name__ == "__main__":
ctranslate_infer("bge_m3_ctranslate", "BAAI/bge-m3")
print("-----------------------------------------------------------------------------")
# st_infer("BAAI/bge-m3")
ctranslate_infer("bge_m3_fp16_ctranslate", "BAAI/bge-m3")
print("-----------------------------------------------------------------------------")
ctranslate_infer("bge_m3_bf16_ctranslate", "BAAI/bge-m3")
print("-----------------------------------------------------------------------------")
ctranslate_infer("bge_m3_int8_ctranslate", "BAAI/bge-m3")
print("-----------------------------------------------------------------------------")
But when i try to infer from the bf16 model i get the following error,
warnings.warn("Can't initialize NVML")
torch.Size([1024])
tensor([[ 0.0102, 0.0150, -0.0657, ..., -0.0160, -0.0231, 0.0077]],
device='cuda:0')
-----------------------------------------------------------------------------
torch.Size([1024])
tensor([[ 0.0102, 0.0151, -0.0657, ..., -0.0160, -0.0232, 0.0077]],
device='cuda:0', dtype=torch.float16)
-----------------------------------------------------------------------------
Traceback (most recent call last):
File "/data/rkoy/vectorizerstats/ctranslate/infer.py", line 43, in <module>
ctranslate_infer("bge_m3_bf16_ctranslate", "BAAI/bge-m3")
File "/data/rkoy/vectorizerstats/ctranslate/infer.py", line 22, in ctranslate_infer
embeddings = torch.as_tensor(output.last_hidden_state, device=device)
RuntimeError: Could not infer dtype of ctranslate2._ext.StorageView
Ctranslate version: 4.6.0
pytorch version: 2.7.1
GPU: L40S
I would like to know whether am i missing anything or is it a bug in the library ?