based on Llama2-7b-chat-hf quantization (full model) on 4K dataset summarization

load_in_8bit=True,
#bnb_4bit_quant_type=bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
#bnb_4bit_use_double_quant=use_nested_quant,