instruction-finetuning

🐃🇹🇭 Buffala-LoRa-TH

Buffala-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the Stanford Alpaca (TH Translated), Wisesignt, WikiTH, Pantip and IAppQ&A dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit the project's website.

Issues and what next?

How to use

import torch
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer


device = "cuda"

tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model = LlamaForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
model = PeftModel.from_pretrained(
    model,
    "Thaweewat/thai-buffala-lora-7b-v0-1",
    torch_dtype=torch.float16,
)

def generate_prompt(instruction, input=None):

    if input:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Input:
{input + get_list_and_snippet(instruction)}
### Response:"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Input:
{get_list_and_snippet(instruction)}
### Response:"""

if not LOAD_8BIT:
    model.half()  

model.eval()
if torch.__version__ >= "2" and sys.platform != "win32":
    model = torch.compile(model)


def evaluate(
    instruction,
    input=None,
    temperature=0.1,
    top_p=0.75,
    top_k=40,
    num_beams=4,
    max_new_tokens=128,
    **kwargs,
):
    prompt = generate_prompt(instruction, input)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].to(device)
    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        num_beams=num_beams,
        **kwargs,
    )
    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=max_new_tokens,
        )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s)
    return output.split("### Response:")[1].strip()

evaluate(instruction = "จงแก้สมการต่อไปนี้ X เท่ากับเท่าไหร่", input="X+Y=15 and Y=7")
""" X = 8 """