Joint Pruning, Quantization and Distillation for BERT-large/SQuADv1.1

Setup

git clone https://github.com/vuiseng9/optimum-intel
cd optimum-intel
git checkout jpqd-mobilebert #commit: 6ef11715ddefd96c67970918d809eea09c8c2e6b
pip install -e .[openvino,nncf]

cd examples/openvino/question-answering/
pip install -r requirements.txt

pip install wandb # optional

Run


NNCFCFG=/path/to/openvino_config.json
MASTER_PORT=<PORTID>
RUNID=<RUN_IDENTIFIER>
OUTDIR=/path/to/saved_model

NTXBLK=15
NEPOCH=16

python run_qa.py \
    --dataset_name squad \
    --model_name_or_path google/mobilebert-uncased \
    --num_tx_block $NTXBLK \
    --teacher_model_or_path bert-large-uncased-whole-word-masking-finetuned-squad \
    --distillation_weight 0.9 \
    --distillation_temperature 2 \
    --do_eval \
    --do_train \
    --fp16 \
    --evaluation_strategy steps \
    --eval_steps 250 \
    --learning_rate 1e-4 \
    --warmup_ratio 0.1 \
    --optim adamw_torch \
    --num_train_epochs $NEPOCH \
    --per_device_eval_batch_size 128 \
    --per_device_train_batch_size 32 \
    --max_seq_length 384 \
    --doc_stride 128 \
    --save_steps 500 \
    --logging_steps 1 \
    --overwrite_output_dir \
    --nncf_compression_config $NNCFCFG \
    --run_name $RUNID \
    --output_dir $OUTDIR \

Reference Results

Global Step: 44000
F1: 90.336
EM: 83.680
Structured Sparsity (linear): 34.31%
Model Sparsity: 19.43%