Bidirectional Tajik-Farsi transliterator

Developed as a semester project in URFU ordered by NAUMEN

authors:

How to use:

!pip install transformers sentencepiece --quiet

import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
model_name = "sml-msn/pst5-tg-fa-bidirectional"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def translit(x, **kwargs):
    inputs = tokenizer(x, return_tensors='pt').to(model.device)     
    with torch.no_grad():    
        hypotheses = model.generate(**inputs, **kwargs)        
    return tokenizer.decode(hypotheses[0], skip_special_tokens=True)

print(translit('Салом Ҷаҳон', max_length = 1024))
print('-------------------')
print(translit('سلام جهان', max_length = 1024))