Bidirectional Tajik-Farsi transliterator
Developed as a semester project in URFU ordered by NAUMEN
authors:
- Shamil Musin
- Julia Krasilnikova
- Olga Petukhova
How to use:
!pip install transformers sentencepiece --quiet
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
model_name = "sml-msn/pst5-tg-fa-bidirectional"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
def translit(x, **kwargs):
inputs = tokenizer(x, return_tensors='pt').to(model.device)
with torch.no_grad():
hypotheses = model.generate(**inputs, **kwargs)
return tokenizer.decode(hypotheses[0], skip_special_tokens=True)
print(translit('Салом Ҷаҳон', max_length = 1024))
print('-------------------')
print(translit('سلام جهان', max_length = 1024))