Training procedure
Framework versions
- PEFT 0.6.0.dev0 Using LoRA to fine tune Bloom-3B model on Squad v2 dataset
How to use: Code below
!pip install -q bitsandbytes datasets accelerate loralib !pip install -q git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git
import torch torch.cuda.is_available()
import os os.environ["CUDA_VISIBLE_DEVICES"]="0" import torch import torch.nn as nn import bitsandbytes as bnb from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained( "bigscience/bloom-3b", torch_dtype=torch.float16, device_map='auto', )
tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")
print(model)
for param in model.parameters(): param.requires_grad = False # freeze the model - train adapters later if param.ndim == 1: # cast the small parameters (e.g. layernorm) to fp32 for stability param.data = param.data.to(torch.float32)
model.gradient_checkpointing_enable() # reduce number of stored activations model.enable_input_require_grads()
class CastOutputToFloat(nn.Sequential): def forward(self, x): return super().forward(x).to(torch.float32) model.lm_head = CastOutputToFloat(model.lm_head)
Helper Function
def print_trainable_parameters(model): """ Prints the number of trainable parameters in the model. """ trainable_params = 0 all_param = 0 for _, param in model.named_parameters(): all_param += param.numel() if param.requires_grad: trainable_params += param.numel() print( f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}" )
from peft import LoraConfig, get_peft_model
config = LoraConfig( r=8, lora_alpha=16, target_modules=["query_key_value"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" )
r, the dimension of the low-rank matrices
lora_alpha, scaling factor for the weight matrices
lora_dropout, dropout probability of the LoRA layers
bias, set to all to train all bias parameters
model = get_peft_model(model, config) print_trainable_parameters(model)
from datasets import load_dataset qa_dataset = load_dataset("squad_v2")
def create_prompt(context, question, answer): if len(answer["text"]) < 1: answer = "Cannot Find Answer" else: answer = answer["text"][0] prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n{answer}</s>" return prompt_template
mapped_qa_dataset = qa_dataset.map(lambda samples: tokenizer(create_prompt(samples['context'], samples['question'], samples['answers'])))
torch.cuda.empty_cache()
Train MOdel on SQUAD DAtaset
import transformers
trainer = transformers.Trainer( # doc for parameter https://huggingface.co/docs/transformers/main_classes/trainer model=model, train_dataset=mapped_qa_dataset["train"], eval_dataset=mapped_qa_dataset["validation"], args=transformers.TrainingArguments( per_device_train_batch_size=4, gradient_accumulation_steps=4, # evaluation_strategy="steps", # eval_steps=1000, warmup_steps=100, max_steps=100, learning_rate=2e-3, fp16=True, logging_steps=1, output_dir='outputs', ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False) )
#Training Arguments
args = TrainingArguments(
output_dir = "/Content/mod",
evaluation_strategy = "epoch", #Can be epoch or steps
learning_rate=2e-5, #According to original bert paper
per_device_train_batch_size=32, #According to original bert paper
per_device_eval_batch_size=32,
num_train_epochs=3, #should be inbetween 2-4 according to the paper
weight_decay=0.01,
prediction_loss_only = True
)
For example if you use evaluation_strategy="steps" and eval_steps=2000 in the TrainingArguments,
you will get training and validation loss for every 2000 steps. If you wanna do it on an epoch level
\I think you need to set evaluation_strategy="epoch" and logging_strategy="epoch" in the
TrainingArguments class.
model.config.use_cache = False # silence the warnings. Please re-enable for inference! trainer.train()
Push to HUB
HUGGING_FACE_USER_NAME = "aloksingh2130" model_name = "Bloom-3B-Squad-v2" model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model_name}", use_auth_token=True)