Training procedure

The following bitsandbytes quantization config was used during training:

Framework versions

Usage

from peft import PeftModel


temperature: float = 0.1,
top_p: float = 0.75
top_k: int = 40,
num_beams: int = 4,
max_new_tokens: int = 128

load_8bit: bool = False
lora_weights: str = "marianna13/alpaca-lora-sum"

model = LlamaForCausalLM.from_pretrained(
                                base_model,
                                load_in_8bit=load_8bit,
                                torch_dtype=torch.float16,
                                device_map="auto",
                            )
model = PeftModel.from_pretrained(
    model,
    lora_weights,
    torch_dtype=torch.float16,
)

inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].to(device)

generation_config = GenerationConfig(
                              temperature=temperature,
                              top_p=top_p,
                              top_k=top_k,
                              num_beams=num_beams,
                              **kwargs,
                          )

with torch.no_grad():
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=max_new_tokens,
    )