!pip install -q bitsandbytes datasets accelerate loralib !pip install -q git+ git+

import torch torch.cuda.is_available()

import os os.environ["CUDA_VISIBLE_DEVICES"]="0" import torch import torch.nn as nn import bitsandbytes as bnb from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained( "bigscience/bloom-3b", torch_dtype=torch.float16, device_map='auto', )

tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")


for param in model.parameters(): param.requires_grad = False # freeze the model - train adapters later if param.ndim == 1: # cast the small parameters (e.g. layernorm) to fp32 for stability =

model.gradient_checkpointing_enable() # reduce number of stored activations model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential): def forward(self, x): return super().forward(x).to(torch.float32) model.lm_head = CastOutputToFloat(model.lm_head)

def print_trainable_parameters(model): """ Prints the number of trainable parameters in the model. """ trainable_params = 0 all_param = 0 for _, param in model.named_parameters(): all_param += param.numel() if param.requires_grad: trainable_params += param.numel() print( f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}" )

from peft import LoraConfig, get_peft_model

config = LoraConfig( r=8, lora_alpha=16, target_modules=["query_key_value"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" )

r, the dimension of the low-rank matrices

lora_alpha, scaling factor for the weight matrices

lora_dropout, dropout probability of the LoRA layers

bias, set to all to train all bias parameters

model = get_peft_model(model, config) print_trainable_parameters(model)

from datasets import load_dataset qa_dataset = load_dataset("squad_v2")

def create_prompt(context, question, answer): if len(answer["text"]) < 1: answer = "Cannot Find Answer" else: answer = answer["text"][0] prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n{answer}</s>" return prompt_template

mapped_qa_dataset = samples: tokenizer(create_prompt(samples['context'], samples['question'], samples['answers'])))


import transformers

trainer = transformers.Trainer( # doc for parameter model=model, train_dataset=mapped_qa_dataset["train"], eval_dataset=mapped_qa_dataset["validation"], args=transformers.TrainingArguments( per_device_train_batch_size=4, gradient_accumulation_steps=4, # evaluation_strategy="steps", # eval_steps=1000, warmup_steps=100, max_steps=100, learning_rate=2e-3, fp16=True, logging_steps=1, output_dir='outputs', ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False) )

model.config.use_cache = False # silence the warnings. Please re-enable for inference! trainer.train()

HUGGING_FACE_USER_NAME = "aloksingh2130" model_name = "Bloom-3B-Squad-v2" model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model_name}", use_auth_token=True)