This model's tokenizer is extended with CS, SK and PL accents using the following code:
from transformers import (
AutoModel,
AutoTokenizer,
)
model_id = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id)
accents = "áčďéěíňóřšťúůýž" # CS
accents += "ąćęłńóśźż" # PL
accents += "áäčďéíĺľňóôŕšťúýž" # SK
accents += accents.upper()
accents = set(c for c in accents)
new_tokens = accents - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
model.resize_token_embeddings(len(tokenizer))