ALBERT Tokenizer¶
Overview¶
This page includes information about how to use AlbertTokenizer with tensorflow-text.
This tokenizer works in sync with Dataset
and so is useful for on the fly tokenization.
from tf_transformers.models import AlbertTokenizerTFText
tokenizer = AlbertTokenizerTFText.from_pretrained("albert-base-v2")
text = ['The following statements are true about sentences in English:',
'',
'A new sentence begins with a capital letter.']
# All tokenizer expects a dictionary
inputs = {'text': text}
outputs = tokenizer(inputs) # Ragged Tensor Output
# Dynamic Padding
tokenizer = AlbertTokenizerTFText.from_pretrained("albert-base-v2", dynamic_padding=True)
text = ['The following statements are true about sentences in English:',
'',
'A new sentence begins with a capital letter.']
inputs = {'text': text}
outputs = tokenizer(inputs) # Dict of tf.Tensor
# Static Padding
tokenizer = AlbertTokenizerTFText.from_pretrained("albert-base-v2", pack_model_inputs=True)
text = ['The following statements are true about sentences in English:',
'',
'A new sentence begins with a capital letter.']
inputs = {'text': text}
outputs = tokenizer(inputs) # Dict of tf.Tensor
# To Add Special Tokens
tokenizer = AlbertTokenizerTFText.from_pretrained("albert-base-v2", add_special_tokens=True)