import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
import transformers
tokenizer = LlamaTokenizer.from_pretrained(
       'ocisd4/openllama-zh',
        add_bos_token=False,
        add_eos_token=False,
        use_auth_token=True,
        use_fast=False)

model = LlamaForCausalLM.from_pretrained('ocisd4/openllama-zh', device_map='auto',use_auth_token=True)


prompt = '關於華碩的傳說'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

generation_output = model.generate(
    input_ids=input_ids, max_new_tokens=256,
    do_sample=True, top_k=40, top_p=0.95, temperature=0.7, repetition_penalty=1.08,
)

print(tokenizer.decode(generation_output[0]))

The is a 7B pretrain model, train from openllama pretrain weight, context size=2048

keep updating new model