Model Card for Model ID

<!-- Provide a quick summary of what the model is/does. -->

Model Details

Uses

As Llama hasn't been added to stable transformers package now(2023/4/12), please install transformers from source.

import torch
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig

transformers_model = "master-thesis-hell/llama-7b_sft-v5"
model = LlamaForCausalLM.from_pretrained(transformers_model, device_map="auto", torch_dtype=torch.float16)
tokenizer = LlamaTokenizer.from_pretrained(transformers_model)

# Our tokenizer has been added the special tokens(bos_token, eos_token, pad_token) already.
# We also set LlamaTokenizer's parameter `add_bos_token` to False.
# If using the original LlamaTokenizer, you would have to configure the above yourself.
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens(
        {
            "eos_token": "</s>",
            "bos_token": "<s>",
            "pad_token": "[PAD]"
        }
    )

def generate_a_response(prompt, generation_config):
    segmenter = tokenizer.eos_token
    prompt = tokenizer.bos_token + prompt + segmenter
    input_ids = tokenizer.encode(prompt, return_tensors='pt').cuda()

    beam_output = model.generate(
        input_ids,
        max_length=1024,
        generation_config=generation_config
    )

    ans = tokenizer.decode(beam_output[0], skip_special_tokens=False)
    return ans.split(segmenter)[1].lstrip()

generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.65,
    num_beams=4,
    no_repeat_ngram_size=7,
)

prompt = "台灣最高的建築物是?"
response = generate_a_response(prompt, generation_config)
print(response)