from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("TeraSpace/gptlarge_matreshka")
model = AutoModelForCausalLM.from_pretrained("TeraSpace/gptlarge_matreshka").cuda()
tokenizer.add_special_tokens({'bos_token': '<s>', 'eos_token': '</s>', 'pad_token': '<pad>'})

while True:
  prompt = "- {}\n-".format(input("~| "))
  encoded_prompt = tokenizer.encode(prompt, return_tensors="pt").cuda()

  out = model.generate(encoded_prompt, max_length=200, do_sample=True, top_k=35, top_p=0.95, temperature=0.8,
                              num_return_sequences=1, eos_token_id=2, pad_token_id=0)
  for i, tokens in enumerate(out.cpu().tolist(), start=1):
    tokens = tokens[encoded_prompt.shape[1]:]
    text = tokenizer.decode(tokens)
    reply = text[:text.index('\n')]
    print('[{}] - {}'.format(i, reply))